diff --git a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp index 412f0432e85cc..967938c1526c6 100644 --- a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp +++ b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp @@ -51,6 +51,10 @@ class SIShrinkInstructions { unsigned SubReg) const; bool instModifiesReg(const MachineInstr *MI, unsigned Reg, unsigned SubReg) const; + Register trySwapCndOperands(MachineInstr &MI) const; + bool shouldSwapCndOperands(Register Reg, + std::vector &UsesToProcess) const; + unsigned getInverseCompareOpcode(MachineInstr &MI) const; TargetInstrInfo::RegSubRegPair getSubRegForIndex(Register Reg, unsigned Sub, unsigned I) const; void dropInstructionKeepingImpDefs(MachineInstr &MI) const; @@ -830,6 +834,215 @@ bool SIShrinkInstructions::tryReplaceDeadSDST(MachineInstr &MI) const { return true; } +unsigned SIShrinkInstructions::getInverseCompareOpcode(MachineInstr &MI) const { + switch (MI.getOpcode()) { + // int 32 + case AMDGPU::V_CMP_EQ_I32_e64: + return AMDGPU::V_CMP_NE_I32_e64; + case AMDGPU::V_CMP_NE_I32_e64: + return AMDGPU::V_CMP_EQ_I32_e64; + case AMDGPU::V_CMP_GE_I32_e64: + return AMDGPU::V_CMP_LT_I32_e64; + case AMDGPU::V_CMP_LE_I32_e64: + return AMDGPU::V_CMP_GT_I32_e64; + case AMDGPU::V_CMP_GT_I32_e64: + return AMDGPU::V_CMP_LE_I32_e64; + case AMDGPU::V_CMP_LT_I32_e64: + return AMDGPU::V_CMP_GE_I32_e64; + // int 64 + case AMDGPU::V_CMP_EQ_I64_e64: + return AMDGPU::V_CMP_NE_I64_e64; + case AMDGPU::V_CMP_NE_I64_e64: + return AMDGPU::V_CMP_EQ_I64_e64; + case AMDGPU::V_CMP_GE_I64_e64: + return AMDGPU::V_CMP_LT_I64_e64; + case AMDGPU::V_CMP_LE_I64_e64: + return AMDGPU::V_CMP_GT_I64_e64; + case AMDGPU::V_CMP_GT_I64_e64: + return AMDGPU::V_CMP_LE_I64_e64; + case AMDGPU::V_CMP_LT_I64_e64: + return AMDGPU::V_CMP_GE_I64_e64; + // unsigned 32 + case AMDGPU::V_CMP_EQ_U32_e64: + return AMDGPU::V_CMP_NE_U32_e64; + case AMDGPU::V_CMP_NE_U32_e64: + return AMDGPU::V_CMP_EQ_U32_e64; + case AMDGPU::V_CMP_GE_U32_e64: + return AMDGPU::V_CMP_LT_U32_e64; + case AMDGPU::V_CMP_LE_U32_e64: + return AMDGPU::V_CMP_GT_U32_e64; + case AMDGPU::V_CMP_GT_U32_e64: + return AMDGPU::V_CMP_LE_U32_e64; + case AMDGPU::V_CMP_LT_U32_e64: + return AMDGPU::V_CMP_GE_U32_e64; + // unsigned 64 + case AMDGPU::V_CMP_EQ_U64_e64: + return AMDGPU::V_CMP_NE_U64_e64; + case AMDGPU::V_CMP_NE_U64_e64: + return AMDGPU::V_CMP_EQ_U64_e64; + case AMDGPU::V_CMP_GE_U64_e64: + return AMDGPU::V_CMP_LT_U64_e64; + case AMDGPU::V_CMP_LE_U64_e64: + return AMDGPU::V_CMP_GT_U64_e64; + case AMDGPU::V_CMP_GT_U64_e64: + return AMDGPU::V_CMP_LE_U64_e64; + case AMDGPU::V_CMP_LT_U64_e64: + return AMDGPU::V_CMP_GE_U64_e64; + // float 32 + case AMDGPU::V_CMP_EQ_F32_e64: + return AMDGPU::V_CMP_NEQ_F32_e64; + case AMDGPU::V_CMP_NEQ_F32_e64: + return AMDGPU::V_CMP_EQ_F32_e64; + case AMDGPU::V_CMP_GE_F32_e64: + return AMDGPU::V_CMP_NGE_F32_e64; + case AMDGPU::V_CMP_NGE_F32_e64: + return AMDGPU::V_CMP_GE_F32_e64; + case AMDGPU::V_CMP_LE_F32_e64: + return AMDGPU::V_CMP_NLE_F32_e64; + case AMDGPU::V_CMP_NLE_F32_e64: + return AMDGPU::V_CMP_LE_F32_e64; + case AMDGPU::V_CMP_GT_F32_e64: + return AMDGPU::V_CMP_NGT_F32_e64; + case AMDGPU::V_CMP_NGT_F32_e64: + return AMDGPU::V_CMP_GT_F32_e64; + case AMDGPU::V_CMP_LT_F32_e64: + return AMDGPU::V_CMP_NLT_F32_e64; + case AMDGPU::V_CMP_NLT_F32_e64: + return AMDGPU::V_CMP_LT_F32_e64; + case AMDGPU::V_CMP_LG_F32_e64: + return AMDGPU::V_CMP_NLG_F32_e64; + case AMDGPU::V_CMP_NLG_F32_e64: + return AMDGPU::V_CMP_LG_F32_e64; + case AMDGPU::V_CMP_O_F32_e64: + return AMDGPU::V_CMP_U_F32_e64; + case AMDGPU::V_CMP_U_F32_e64: + return AMDGPU::V_CMP_O_F32_e64; + // float 64 + case AMDGPU::V_CMP_EQ_F64_e64: + return AMDGPU::V_CMP_NEQ_F64_e64; + case AMDGPU::V_CMP_NEQ_F64_e64: + return AMDGPU::V_CMP_EQ_F64_e64; + case AMDGPU::V_CMP_GE_F64_e64: + return AMDGPU::V_CMP_NGE_F64_e64; + case AMDGPU::V_CMP_NGE_F64_e64: + return AMDGPU::V_CMP_GE_F64_e64; + case AMDGPU::V_CMP_LE_F64_e64: + return AMDGPU::V_CMP_NLE_F64_e64; + case AMDGPU::V_CMP_NLE_F64_e64: + return AMDGPU::V_CMP_LE_F64_e64; + case AMDGPU::V_CMP_GT_F64_e64: + return AMDGPU::V_CMP_NGT_F64_e64; + case AMDGPU::V_CMP_NGT_F64_e64: + return AMDGPU::V_CMP_GT_F32_e64; + case AMDGPU::V_CMP_LT_F64_e64: + return AMDGPU::V_CMP_NLT_F64_e64; + case AMDGPU::V_CMP_NLT_F64_e64: + return AMDGPU::V_CMP_LT_F64_e64; + case AMDGPU::V_CMP_LG_F64_e64: + return AMDGPU::V_CMP_NLG_F64_e64; + case AMDGPU::V_CMP_NLG_F64_e64: + return AMDGPU::V_CMP_LG_F64_e64; + case AMDGPU::V_CMP_O_F64_e64: + return AMDGPU::V_CMP_U_F64_e64; + case AMDGPU::V_CMP_U_F64_e64: + return AMDGPU::V_CMP_O_F64_e64; + default: + return 0; + } +} + +bool SIShrinkInstructions::shouldSwapCndOperands( + Register Reg, std::vector &UsesToProcess) const { + auto AllUses = MRI->use_nodbg_instructions(Reg); + int InstsToSwap = 0; + + for (auto &UseInst : AllUses) { + if (UseInst.getOpcode() != AMDGPU::V_CNDMASK_B32_e64) + return false; + + UsesToProcess.push_back(&UseInst); + + MachineOperand &Src0 = UseInst.getOperand(2); + MachineOperand &Src1 = UseInst.getOperand(4); + + //if instruction has source modifiers it cannot be converted to VOP2 + if (UseInst.getOperand(1).getImm() != SISrcMods::NONE || + UseInst.getOperand(3).getImm() != SISrcMods::NONE) + continue; + + bool Src0IsVGPR = Src0.isReg() && TRI->isVGPR(*MRI, Src0.getReg()); + bool Src1IsVGPR = Src1.isReg() && TRI->isVGPR(*MRI, Src1.getReg()); + + //Src1 always has to be VGPR in VOP2 + if (!Src0IsVGPR && Src1IsVGPR) + InstsToSwap--; + else if (Src0IsVGPR && !Src1IsVGPR) + InstsToSwap++; + } + return InstsToSwap > 0; +} + +static void swapCndOperands(MachineInstr &MI) { + MachineOperand &Op2 = MI.getOperand(2); + MachineOperand Op4 = MI.getOperand(4); + + if (Op2.isReg()) { + MI.getOperand(4).ChangeToRegister( + Op2.getReg(), Op2.isDef(), Op2.isImplicit(), Op2.isKill(), Op2.isDead(), + Op2.isUndef(), Op2.isDebug()); + MI.getOperand(4).setSubReg(Op2.getSubReg()); + } else if (Op2.isImm()) { + MI.getOperand(4).ChangeToImmediate(Op2.getImm()); + } + + if (Op4.isReg()) { + Op2.ChangeToRegister(Op4.getReg(), Op4.isDef(), Op4.isImplicit(), + Op4.isKill(), Op4.isDead(), Op4.isUndef(), + Op4.isDebug()); + Op2.setSubReg(Op4.getSubReg()); + } else if (Op4.isImm()) { + Op2.ChangeToImmediate(Op4.getImm()); + } + + auto Op1Imm = MI.getOperand(1).getImm(); + auto Op3Imm = MI.getOperand(3).getImm(); + MI.getOperand(1).setImm(Op3Imm); + MI.getOperand(3).setImm(Op1Imm); +} + +Register SIShrinkInstructions::trySwapCndOperands(MachineInstr &MI) const { + Register Reg = MI.getOperand(0).getReg(); + + unsigned Opcode = getInverseCompareOpcode(MI); + std::vector UsesToProcess; + if (!Opcode || + !SIShrinkInstructions::shouldSwapCndOperands(Reg, UsesToProcess)) + return Reg; + + auto DL = MI.getDebugLoc(); + Register NewVCC = MRI->createVirtualRegister(MRI->getRegClass(Reg)); + + MachineInstrBuilder InverseCompare = + BuildMI(*MI.getParent(), MI, DL, TII->get(Opcode), NewVCC); + InverseCompare->setFlags(MI.getFlags()); + + unsigned OpNum = MI.getNumExplicitOperands(); + for (unsigned Idx = 1; Idx < OpNum; Idx++) { + MachineOperand &Op = MI.getOperand(Idx); + InverseCompare.add(Op); + if (Op.isReg() && Op.isKill()) + InverseCompare->getOperand(Idx).setIsKill(false); + } + + for (auto Use : UsesToProcess) { + swapCndOperands(*Use); + } + + MRI->replaceRegWith(Reg, NewVCC); + MI.eraseFromParent(); + return NewVCC; +} + bool SIShrinkInstructions::run(MachineFunction &MF) { this->MF = &MF; @@ -840,6 +1053,11 @@ bool SIShrinkInstructions::run(MachineFunction &MF) { unsigned VCCReg = ST->isWave32() ? AMDGPU::VCC_LO : AMDGPU::VCC; +<<<<<<< HEAD +======= + std::vector I1Defs; + +>>>>>>> 1336afc5defe (update tests) for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE; ++BI) { @@ -997,6 +1215,7 @@ bool SIShrinkInstructions::run(MachineFunction &MF) { // dst. Register DstReg = Op0.getReg(); if (DstReg.isVirtual()) { + DstReg = trySwapCndOperands(MI); // VOPC instructions can only write to the VCC register. We can't // force them to use VCC here, because this is only one register and // cannot deal with sequences which would require multiple copies of diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/constant-bus-restriction.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/constant-bus-restriction.ll index b60f4c1250264..37af231729e20 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/constant-bus-restriction.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/constant-bus-restriction.ll @@ -145,8 +145,8 @@ define amdgpu_ps float @select_vcc_s_s(float %cmp0, float %cmp1, float inreg %sr ; GFX10PLUS-LABEL: select_vcc_s_s: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: v_mov_b32_e32 v2, s3 -; GFX10PLUS-NEXT: v_cmp_eq_f32_e32 vcc_lo, v0, v1 -; GFX10PLUS-NEXT: v_cndmask_b32_e64 v0, v2, s2, vcc_lo +; GFX10PLUS-NEXT: v_cmp_neq_f32_e32 vcc_lo, v0, v1 +; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, s2, v2, vcc_lo ; GFX10PLUS-NEXT: ; return to shader part epilog %cmp = fcmp oeq float %cmp0, %cmp1 %result = select i1 %cmp, float %src0, float %src1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll index 9b35920f8547a..0de653914690a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll @@ -13,9 +13,9 @@ define float @dyn_extract_v8f32_const_s_v(i32 %sel) { ; GCN-NEXT: v_cndmask_b32_e64 v6, 1.0, 2.0, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 ; GCN-NEXT: v_cndmask_b32_e32 v1, v6, v1, vcc -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 +; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 3, v0 ; GCN-NEXT: v_mov_b32_e32 v2, 0x40a00000 -; GCN-NEXT: v_cndmask_b32_e64 v1, v1, 4.0, vcc +; GCN-NEXT: v_cndmask_b32_e32 v1, 4.0, v1, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0 ; GCN-NEXT: v_mov_b32_e32 v3, 0x40c00000 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc @@ -34,18 +34,18 @@ define float @dyn_extract_v8f32_const_s_v(i32 %sel) { ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, 1.0, 2.0, vcc_lo -; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 -; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, 0x40400000, vcc_lo -; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 -; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, 4.0, vcc_lo -; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 -; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, 0x40a00000, vcc_lo -; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0 -; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, 0x40c00000, vcc_lo -; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0 -; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, 0x40e00000, vcc_lo -; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0 -; GFX10PLUS-NEXT: v_cndmask_b32_e64 v0, v1, 0x41000000, vcc_lo +; GFX10PLUS-NEXT: v_cmp_ne_u32_e32 vcc_lo, 2, v0 +; GFX10PLUS-NEXT: v_cndmask_b32_e32 v1, 0x40400000, v1, vcc_lo +; GFX10PLUS-NEXT: v_cmp_ne_u32_e32 vcc_lo, 3, v0 +; GFX10PLUS-NEXT: v_cndmask_b32_e32 v1, 4.0, v1, vcc_lo +; GFX10PLUS-NEXT: v_cmp_ne_u32_e32 vcc_lo, 4, v0 +; GFX10PLUS-NEXT: v_cndmask_b32_e32 v1, 0x40a00000, v1, vcc_lo +; GFX10PLUS-NEXT: v_cmp_ne_u32_e32 vcc_lo, 5, v0 +; GFX10PLUS-NEXT: v_cndmask_b32_e32 v1, 0x40c00000, v1, vcc_lo +; GFX10PLUS-NEXT: v_cmp_ne_u32_e32 vcc_lo, 6, v0 +; GFX10PLUS-NEXT: v_cndmask_b32_e32 v1, 0x40e00000, v1, vcc_lo +; GFX10PLUS-NEXT: v_cmp_ne_u32_e32 vcc_lo, 7, v0 +; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, 0x41000000, v1, vcc_lo ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] entry: %ext = extractelement <8 x float> , i32 %sel @@ -138,18 +138,18 @@ define amdgpu_ps float @dyn_extract_v8f32_s_v(<8 x float> inreg %vec, i32 %sel) ; GFX10PLUS-NEXT: v_mov_b32_e32 v1, s3 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v1, s2, v1, vcc_lo -; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 -; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s4, vcc_lo -; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 -; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s5, vcc_lo -; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 -; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s6, vcc_lo -; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0 -; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s7, vcc_lo -; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0 -; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s8, vcc_lo -; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0 -; GFX10PLUS-NEXT: v_cndmask_b32_e64 v0, v1, s9, vcc_lo +; GFX10PLUS-NEXT: v_cmp_ne_u32_e32 vcc_lo, 2, v0 +; GFX10PLUS-NEXT: v_cndmask_b32_e32 v1, s4, v1, vcc_lo +; GFX10PLUS-NEXT: v_cmp_ne_u32_e32 vcc_lo, 3, v0 +; GFX10PLUS-NEXT: v_cndmask_b32_e32 v1, s5, v1, vcc_lo +; GFX10PLUS-NEXT: v_cmp_ne_u32_e32 vcc_lo, 4, v0 +; GFX10PLUS-NEXT: v_cndmask_b32_e32 v1, s6, v1, vcc_lo +; GFX10PLUS-NEXT: v_cmp_ne_u32_e32 vcc_lo, 5, v0 +; GFX10PLUS-NEXT: v_cndmask_b32_e32 v1, s7, v1, vcc_lo +; GFX10PLUS-NEXT: v_cmp_ne_u32_e32 vcc_lo, 6, v0 +; GFX10PLUS-NEXT: v_cndmask_b32_e32 v1, s8, v1, vcc_lo +; GFX10PLUS-NEXT: v_cmp_ne_u32_e32 vcc_lo, 7, v0 +; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, s9, v1, vcc_lo ; GFX10PLUS-NEXT: ; return to shader part epilog entry: %ext = extractelement <8 x float> %vec, i32 %sel @@ -351,6 +351,7 @@ define i64 @dyn_extract_v8i64_const_s_v(i32 %sel) { ; GFX10-NEXT: s_mov_b64 s[8:9], 3 ; GFX10-NEXT: v_cndmask_b32_e32 v1, s6, v1, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e32 v2, s7, v2, vcc_lo +<<<<<<< HEAD ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 ; GFX10-NEXT: s_mov_b64 s[6:7], 4 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s8, s4 @@ -373,6 +374,31 @@ define i64 @dyn_extract_v8i64_const_s_v(i32 %sel) { ; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s9, s4 ; GFX10-NEXT: v_cndmask_b32_e64 v0, v1, s6, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e64 v1, v2, s7, vcc_lo +======= +; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 2, v0 +; GFX10-NEXT: s_mov_b64 s[6:7], 4 +; GFX10-NEXT: v_cndmask_b32_e32 v1, s4, v1, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v2, s5, v2, vcc_lo +; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 3, v0 +; GFX10-NEXT: s_mov_b64 s[4:5], 5 +; GFX10-NEXT: v_cndmask_b32_e32 v1, s6, v1, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v2, s7, v2, vcc_lo +; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 4, v0 +; GFX10-NEXT: s_mov_b64 s[6:7], 6 +; GFX10-NEXT: v_cndmask_b32_e32 v1, s4, v1, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v2, s5, v2, vcc_lo +; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 5, v0 +; GFX10-NEXT: s_mov_b64 s[4:5], 7 +; GFX10-NEXT: v_cndmask_b32_e32 v1, s6, v1, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v2, s7, v2, vcc_lo +; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 6, v0 +; GFX10-NEXT: s_mov_b64 s[6:7], 8 +; GFX10-NEXT: v_cndmask_b32_e32 v1, s4, v1, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v2, s5, v2, vcc_lo +; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 7, v0 +; GFX10-NEXT: v_cndmask_b32_e32 v0, s6, v1, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v1, s7, v2, vcc_lo +>>>>>>> 51c61e76cef3 (added sgpr case, refactoring) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: dyn_extract_v8i64_const_s_v: @@ -386,6 +412,7 @@ define i64 @dyn_extract_v8i64_const_s_v(i32 %sel) { ; GFX11-NEXT: s_mov_b64 s[4:5], 3 ; GFX11-NEXT: v_cndmask_b32_e32 v1, s2, v1, vcc_lo ; GFX11-NEXT: v_cndmask_b32_e32 v2, s3, v2, vcc_lo +<<<<<<< HEAD ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 ; GFX11-NEXT: s_mov_b64 s[2:3], 4 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s4, s0 @@ -408,6 +435,31 @@ define i64 @dyn_extract_v8i64_const_s_v(i32 %sel) { ; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s5, s0 ; GFX11-NEXT: v_cndmask_b32_e64 v0, v1, s2, vcc_lo ; GFX11-NEXT: v_cndmask_b32_e64 v1, v2, s3, vcc_lo +======= +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 2, v0 +; GFX11-NEXT: s_mov_b64 s[2:3], 4 +; GFX11-NEXT: v_cndmask_b32_e32 v1, s0, v1, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v2, s1, v2, vcc_lo +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 3, v0 +; GFX11-NEXT: s_mov_b64 s[0:1], 5 +; GFX11-NEXT: v_cndmask_b32_e32 v1, s2, v1, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v2, s3, v2, vcc_lo +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 4, v0 +; GFX11-NEXT: s_mov_b64 s[2:3], 6 +; GFX11-NEXT: v_cndmask_b32_e32 v1, s0, v1, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v2, s1, v2, vcc_lo +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 5, v0 +; GFX11-NEXT: s_mov_b64 s[0:1], 7 +; GFX11-NEXT: v_cndmask_b32_e32 v1, s2, v1, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v2, s3, v2, vcc_lo +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 6, v0 +; GFX11-NEXT: s_mov_b64 s[2:3], 8 +; GFX11-NEXT: v_cndmask_b32_e32 v1, s0, v1, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v2, s1, v2, vcc_lo +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 7, v0 +; GFX11-NEXT: v_cndmask_b32_e32 v0, s2, v1, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v1, s3, v2, vcc_lo +>>>>>>> 51c61e76cef3 (added sgpr case, refactoring) ; GFX11-NEXT: s_setpc_b64 s[30:31] entry: %ext = extractelement <8 x i64> , i32 %sel @@ -579,24 +631,24 @@ define amdgpu_ps void @dyn_extract_v8i64_s_v(<8 x i64> inreg %vec, i32 %sel) { ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 ; GFX10-NEXT: v_cndmask_b32_e32 v1, s2, v1, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e32 v2, s3, v2, vcc_lo -; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s6, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s7, vcc_lo -; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s8, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s9, vcc_lo -; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s10, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s11, vcc_lo -; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s12, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s13, vcc_lo -; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s14, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s15, vcc_lo -; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v0, v1, s16, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v1, v2, s17, vcc_lo +; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 2, v0 +; GFX10-NEXT: v_cndmask_b32_e32 v1, s6, v1, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v2, s7, v2, vcc_lo +; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 3, v0 +; GFX10-NEXT: v_cndmask_b32_e32 v1, s8, v1, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v2, s9, v2, vcc_lo +; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 4, v0 +; GFX10-NEXT: v_cndmask_b32_e32 v1, s10, v1, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v2, s11, v2, vcc_lo +; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 5, v0 +; GFX10-NEXT: v_cndmask_b32_e32 v1, s12, v1, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v2, s13, v2, vcc_lo +; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 6, v0 +; GFX10-NEXT: v_cndmask_b32_e32 v1, s14, v1, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v2, s15, v2, vcc_lo +; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 7, v0 +; GFX10-NEXT: v_cndmask_b32_e32 v0, s16, v1, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v1, s17, v2, vcc_lo ; GFX10-NEXT: global_store_dwordx2 v[0:1], v[0:1], off ; GFX10-NEXT: s_endpgm ; @@ -606,24 +658,24 @@ define amdgpu_ps void @dyn_extract_v8i64_s_v(<8 x i64> inreg %vec, i32 %sel) { ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 ; GFX11-NEXT: v_cndmask_b32_e32 v1, s2, v1, vcc_lo ; GFX11-NEXT: v_cndmask_b32_e32 v2, s3, v2, vcc_lo -; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 -; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s6, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s7, vcc_lo -; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 -; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s8, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s9, vcc_lo -; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 -; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s10, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s11, vcc_lo -; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0 -; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s12, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s13, vcc_lo -; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0 -; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s14, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s15, vcc_lo -; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0 -; GFX11-NEXT: v_cndmask_b32_e64 v0, v1, s16, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v1, v2, s17, vcc_lo +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 2, v0 +; GFX11-NEXT: v_cndmask_b32_e32 v1, s6, v1, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v2, s7, v2, vcc_lo +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 3, v0 +; GFX11-NEXT: v_cndmask_b32_e32 v1, s8, v1, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v2, s9, v2, vcc_lo +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 4, v0 +; GFX11-NEXT: v_cndmask_b32_e32 v1, s10, v1, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v2, s11, v2, vcc_lo +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 5, v0 +; GFX11-NEXT: v_cndmask_b32_e32 v1, s12, v1, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v2, s13, v2, vcc_lo +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 6, v0 +; GFX11-NEXT: v_cndmask_b32_e32 v1, s14, v1, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v2, s15, v2, vcc_lo +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 7, v0 +; GFX11-NEXT: v_cndmask_b32_e32 v0, s16, v1, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v1, s17, v2, vcc_lo ; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off ; GFX11-NEXT: s_endpgm entry: @@ -2121,14 +2173,14 @@ define amdgpu_ps float @dyn_extract_v6f32_s_v(<6 x float> inreg %vec, i32 %sel) ; GFX10PLUS-NEXT: v_mov_b32_e32 v1, s3 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v1, s2, v1, vcc_lo -; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 -; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s4, vcc_lo -; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 -; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s5, vcc_lo -; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 -; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s6, vcc_lo -; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0 -; GFX10PLUS-NEXT: v_cndmask_b32_e64 v0, v1, s7, vcc_lo +; GFX10PLUS-NEXT: v_cmp_ne_u32_e32 vcc_lo, 2, v0 +; GFX10PLUS-NEXT: v_cndmask_b32_e32 v1, s4, v1, vcc_lo +; GFX10PLUS-NEXT: v_cmp_ne_u32_e32 vcc_lo, 3, v0 +; GFX10PLUS-NEXT: v_cndmask_b32_e32 v1, s5, v1, vcc_lo +; GFX10PLUS-NEXT: v_cmp_ne_u32_e32 vcc_lo, 4, v0 +; GFX10PLUS-NEXT: v_cndmask_b32_e32 v1, s6, v1, vcc_lo +; GFX10PLUS-NEXT: v_cmp_ne_u32_e32 vcc_lo, 5, v0 +; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, s7, v1, vcc_lo ; GFX10PLUS-NEXT: ; return to shader part epilog entry: %ext = extractelement <6 x float> %vec, i32 %sel @@ -2267,16 +2319,16 @@ define amdgpu_ps float @dyn_extract_v7f32_s_v(<7 x float> inreg %vec, i32 %sel) ; GFX10PLUS-NEXT: v_mov_b32_e32 v1, s3 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v1, s2, v1, vcc_lo -; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 -; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s4, vcc_lo -; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 -; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s5, vcc_lo -; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 -; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s6, vcc_lo -; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0 -; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s7, vcc_lo -; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0 -; GFX10PLUS-NEXT: v_cndmask_b32_e64 v0, v1, s8, vcc_lo +; GFX10PLUS-NEXT: v_cmp_ne_u32_e32 vcc_lo, 2, v0 +; GFX10PLUS-NEXT: v_cndmask_b32_e32 v1, s4, v1, vcc_lo +; GFX10PLUS-NEXT: v_cmp_ne_u32_e32 vcc_lo, 3, v0 +; GFX10PLUS-NEXT: v_cndmask_b32_e32 v1, s5, v1, vcc_lo +; GFX10PLUS-NEXT: v_cmp_ne_u32_e32 vcc_lo, 4, v0 +; GFX10PLUS-NEXT: v_cndmask_b32_e32 v1, s6, v1, vcc_lo +; GFX10PLUS-NEXT: v_cmp_ne_u32_e32 vcc_lo, 5, v0 +; GFX10PLUS-NEXT: v_cndmask_b32_e32 v1, s7, v1, vcc_lo +; GFX10PLUS-NEXT: v_cmp_ne_u32_e32 vcc_lo, 6, v0 +; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, s8, v1, vcc_lo ; GFX10PLUS-NEXT: ; return to shader part epilog entry: %ext = extractelement <7 x float> %vec, i32 %sel @@ -2439,18 +2491,18 @@ define amdgpu_ps double @dyn_extract_v6f64_s_v(<6 x double> inreg %vec, i32 %sel ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 ; GFX10-NEXT: v_cndmask_b32_e32 v1, s2, v1, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e32 v2, s3, v2, vcc_lo -; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s6, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s7, vcc_lo -; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s8, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s9, vcc_lo -; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s10, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s11, vcc_lo -; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v0, v1, s12, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v1, v2, s13, vcc_lo +; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 2, v0 +; GFX10-NEXT: v_cndmask_b32_e32 v1, s6, v1, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v2, s7, v2, vcc_lo +; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 3, v0 +; GFX10-NEXT: v_cndmask_b32_e32 v1, s8, v1, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v2, s9, v2, vcc_lo +; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 4, v0 +; GFX10-NEXT: v_cndmask_b32_e32 v1, s10, v1, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v2, s11, v2, vcc_lo +; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 5, v0 +; GFX10-NEXT: v_cndmask_b32_e32 v0, s12, v1, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v1, s13, v2, vcc_lo ; GFX10-NEXT: v_readfirstlane_b32 s0, v0 ; GFX10-NEXT: v_readfirstlane_b32 s1, v1 ; GFX10-NEXT: ; return to shader part epilog @@ -2461,18 +2513,18 @@ define amdgpu_ps double @dyn_extract_v6f64_s_v(<6 x double> inreg %vec, i32 %sel ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 ; GFX11-NEXT: v_cndmask_b32_e32 v1, s2, v1, vcc_lo ; GFX11-NEXT: v_cndmask_b32_e32 v2, s3, v2, vcc_lo -; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 -; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s6, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s7, vcc_lo -; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 -; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s8, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s9, vcc_lo -; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 -; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s10, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s11, vcc_lo -; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0 -; GFX11-NEXT: v_cndmask_b32_e64 v0, v1, s12, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v1, v2, s13, vcc_lo +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 2, v0 +; GFX11-NEXT: v_cndmask_b32_e32 v1, s6, v1, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v2, s7, v2, vcc_lo +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 3, v0 +; GFX11-NEXT: v_cndmask_b32_e32 v1, s8, v1, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v2, s9, v2, vcc_lo +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 4, v0 +; GFX11-NEXT: v_cndmask_b32_e32 v1, s10, v1, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v2, s11, v2, vcc_lo +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 5, v0 +; GFX11-NEXT: v_cndmask_b32_e32 v0, s12, v1, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v1, s13, v2, vcc_lo ; GFX11-NEXT: v_readfirstlane_b32 s0, v0 ; GFX11-NEXT: v_readfirstlane_b32 s1, v1 ; GFX11-NEXT: ; return to shader part epilog @@ -2665,27 +2717,27 @@ define amdgpu_ps double @dyn_extract_v7f64_s_v_bitcast(<14 x float> inreg %userD ; GFX10-NEXT: v_mov_b32_e32 v1, s4 ; GFX10-NEXT: v_mov_b32_e32 v2, s5 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX10-NEXT: s_mov_b32 s0, s14 +; GFX10-NEXT: s_mov_b32 s0, s15 ; GFX10-NEXT: v_cndmask_b32_e32 v1, s2, v1, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e32 v2, s3, v2, vcc_lo -; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s6, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s7, vcc_lo -; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s8, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s9, vcc_lo -; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s10, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s11, vcc_lo -; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s12, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s13, vcc_lo -; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s0, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s15, vcc_lo -; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v0, v1, s2, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v1, v2, s3, vcc_lo +; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 2, v0 +; GFX10-NEXT: v_cndmask_b32_e32 v1, s6, v1, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v2, s7, v2, vcc_lo +; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 3, v0 +; GFX10-NEXT: v_cndmask_b32_e32 v1, s8, v1, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v2, s9, v2, vcc_lo +; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 4, v0 +; GFX10-NEXT: v_cndmask_b32_e32 v1, s10, v1, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v2, s11, v2, vcc_lo +; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 5, v0 +; GFX10-NEXT: v_cndmask_b32_e32 v1, s12, v1, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v2, s13, v2, vcc_lo +; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 6, v0 +; GFX10-NEXT: v_cndmask_b32_e32 v1, s14, v1, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v2, s0, v2, vcc_lo +; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 7, v0 +; GFX10-NEXT: v_cndmask_b32_e32 v0, s1, v1, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v1, s2, v2, vcc_lo ; GFX10-NEXT: v_readfirstlane_b32 s0, v0 ; GFX10-NEXT: v_readfirstlane_b32 s1, v1 ; GFX10-NEXT: ; return to shader part epilog @@ -2694,27 +2746,27 @@ define amdgpu_ps double @dyn_extract_v7f64_s_v_bitcast(<14 x float> inreg %userD ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: v_dual_mov_b32 v1, s4 :: v_dual_mov_b32 v2, s5 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX11-NEXT: s_mov_b32 s0, s14 +; GFX11-NEXT: s_mov_b32 s0, s15 ; GFX11-NEXT: v_cndmask_b32_e32 v1, s2, v1, vcc_lo ; GFX11-NEXT: v_cndmask_b32_e32 v2, s3, v2, vcc_lo -; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 -; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s6, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s7, vcc_lo -; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 -; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s8, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s9, vcc_lo -; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 -; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s10, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s11, vcc_lo -; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0 -; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s12, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s13, vcc_lo -; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0 -; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s0, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s15, vcc_lo -; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0 -; GFX11-NEXT: v_cndmask_b32_e64 v0, v1, s2, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v1, v2, s3, vcc_lo +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 2, v0 +; GFX11-NEXT: v_cndmask_b32_e32 v1, s6, v1, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v2, s7, v2, vcc_lo +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 3, v0 +; GFX11-NEXT: v_cndmask_b32_e32 v1, s8, v1, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v2, s9, v2, vcc_lo +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 4, v0 +; GFX11-NEXT: v_cndmask_b32_e32 v1, s10, v1, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v2, s11, v2, vcc_lo +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 5, v0 +; GFX11-NEXT: v_cndmask_b32_e32 v1, s12, v1, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v2, s13, v2, vcc_lo +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 6, v0 +; GFX11-NEXT: v_cndmask_b32_e32 v1, s14, v1, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v2, s0, v2, vcc_lo +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 7, v0 +; GFX11-NEXT: v_cndmask_b32_e32 v0, s1, v1, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v1, s2, v2, vcc_lo ; GFX11-NEXT: v_readfirstlane_b32 s0, v0 ; GFX11-NEXT: v_readfirstlane_b32 s1, v1 ; GFX11-NEXT: ; return to shader part epilog @@ -2791,27 +2843,27 @@ define amdgpu_ps double @dyn_extract_v7f64_s_v(<7 x double> inreg %vec, i32 %sel ; GFX10-NEXT: v_mov_b32_e32 v1, s4 ; GFX10-NEXT: v_mov_b32_e32 v2, s5 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX10-NEXT: s_mov_b32 s0, s14 +; GFX10-NEXT: s_mov_b32 s0, s15 ; GFX10-NEXT: v_cndmask_b32_e32 v1, s2, v1, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e32 v2, s3, v2, vcc_lo -; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s6, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s7, vcc_lo -; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s8, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s9, vcc_lo -; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s10, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s11, vcc_lo -; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s12, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s13, vcc_lo -; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s0, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s15, vcc_lo -; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v0, v1, s2, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v1, v2, s3, vcc_lo +; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 2, v0 +; GFX10-NEXT: v_cndmask_b32_e32 v1, s6, v1, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v2, s7, v2, vcc_lo +; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 3, v0 +; GFX10-NEXT: v_cndmask_b32_e32 v1, s8, v1, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v2, s9, v2, vcc_lo +; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 4, v0 +; GFX10-NEXT: v_cndmask_b32_e32 v1, s10, v1, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v2, s11, v2, vcc_lo +; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 5, v0 +; GFX10-NEXT: v_cndmask_b32_e32 v1, s12, v1, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v2, s13, v2, vcc_lo +; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 6, v0 +; GFX10-NEXT: v_cndmask_b32_e32 v1, s14, v1, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v2, s0, v2, vcc_lo +; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 7, v0 +; GFX10-NEXT: v_cndmask_b32_e32 v0, s1, v1, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v1, s2, v2, vcc_lo ; GFX10-NEXT: v_readfirstlane_b32 s0, v0 ; GFX10-NEXT: v_readfirstlane_b32 s1, v1 ; GFX10-NEXT: ; return to shader part epilog @@ -2820,27 +2872,27 @@ define amdgpu_ps double @dyn_extract_v7f64_s_v(<7 x double> inreg %vec, i32 %sel ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: v_dual_mov_b32 v1, s4 :: v_dual_mov_b32 v2, s5 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX11-NEXT: s_mov_b32 s0, s14 +; GFX11-NEXT: s_mov_b32 s0, s15 ; GFX11-NEXT: v_cndmask_b32_e32 v1, s2, v1, vcc_lo ; GFX11-NEXT: v_cndmask_b32_e32 v2, s3, v2, vcc_lo -; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 -; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s6, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s7, vcc_lo -; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 -; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s8, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s9, vcc_lo -; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 -; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s10, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s11, vcc_lo -; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0 -; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s12, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s13, vcc_lo -; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0 -; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s0, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s15, vcc_lo -; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0 -; GFX11-NEXT: v_cndmask_b32_e64 v0, v1, s2, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v1, v2, s3, vcc_lo +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 2, v0 +; GFX11-NEXT: v_cndmask_b32_e32 v1, s6, v1, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v2, s7, v2, vcc_lo +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 3, v0 +; GFX11-NEXT: v_cndmask_b32_e32 v1, s8, v1, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v2, s9, v2, vcc_lo +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 4, v0 +; GFX11-NEXT: v_cndmask_b32_e32 v1, s10, v1, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v2, s11, v2, vcc_lo +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 5, v0 +; GFX11-NEXT: v_cndmask_b32_e32 v1, s12, v1, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v2, s13, v2, vcc_lo +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 6, v0 +; GFX11-NEXT: v_cndmask_b32_e32 v1, s14, v1, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v2, s0, v2, vcc_lo +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 7, v0 +; GFX11-NEXT: v_cndmask_b32_e32 v0, s1, v1, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v1, s2, v2, vcc_lo ; GFX11-NEXT: v_readfirstlane_b32 s0, v0 ; GFX11-NEXT: v_readfirstlane_b32 s1, v1 ; GFX11-NEXT: ; return to shader part epilog @@ -3388,9 +3440,9 @@ define float @dyn_extract_v15f32_const_s_v(i32 %sel) { ; GCN-NEXT: v_cndmask_b32_e64 v13, 1.0, 2.0, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 ; GCN-NEXT: v_cndmask_b32_e32 v1, v13, v1, vcc -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 +; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 3, v0 ; GCN-NEXT: v_mov_b32_e32 v2, 0x40a00000 -; GCN-NEXT: v_cndmask_b32_e64 v1, v1, 4.0, vcc +; GCN-NEXT: v_cndmask_b32_e32 v1, 4.0, v1, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0 ; GCN-NEXT: v_mov_b32_e32 v3, 0x40c00000 ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc @@ -3432,34 +3484,34 @@ define float @dyn_extract_v15f32_const_s_v(i32 %sel) { ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 ; GFX10-NEXT: v_cndmask_b32_e64 v1, 1.0, 2.0, vcc_lo -; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x40400000, vcc_lo -; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 4.0, vcc_lo -; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x40a00000, vcc_lo -; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x40c00000, vcc_lo -; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x40e00000, vcc_lo -; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x41000000, vcc_lo -; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 8, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x41100000, vcc_lo -; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 9, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x41200000, vcc_lo -; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 10, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x41300000, vcc_lo -; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 11, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x41400000, vcc_lo -; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 12, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x41500000, vcc_lo -; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 13, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x41600000, vcc_lo -; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 14, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x41700000, vcc_lo -; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 15, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v0, v1, s4, vcc_lo +; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 2, v0 +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x40400000, v1, vcc_lo +; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 3, v0 +; GFX10-NEXT: v_cndmask_b32_e32 v1, 4.0, v1, vcc_lo +; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 4, v0 +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x40a00000, v1, vcc_lo +; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 5, v0 +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x40c00000, v1, vcc_lo +; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 6, v0 +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x40e00000, v1, vcc_lo +; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 7, v0 +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x41000000, v1, vcc_lo +; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 8, v0 +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x41100000, v1, vcc_lo +; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 9, v0 +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x41200000, v1, vcc_lo +; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 10, v0 +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x41300000, v1, vcc_lo +; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 11, v0 +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x41400000, v1, vcc_lo +; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 12, v0 +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x41500000, v1, vcc_lo +; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 13, v0 +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x41600000, v1, vcc_lo +; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 14, v0 +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x41700000, v1, vcc_lo +; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 15, v0 +; GFX10-NEXT: v_cndmask_b32_e32 v0, s4, v1, vcc_lo ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: dyn_extract_v15f32_const_s_v: @@ -3467,34 +3519,34 @@ define float @dyn_extract_v15f32_const_s_v(i32 %sel) { ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 ; GFX11-NEXT: v_cndmask_b32_e64 v1, 1.0, 2.0, vcc_lo -; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 -; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x40400000, vcc_lo -; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 -; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 4.0, vcc_lo -; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 -; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x40a00000, vcc_lo -; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0 -; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x40c00000, vcc_lo -; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0 -; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x40e00000, vcc_lo -; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0 -; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x41000000, vcc_lo -; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 8, v0 -; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x41100000, vcc_lo -; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 9, v0 -; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x41200000, vcc_lo -; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 10, v0 -; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x41300000, vcc_lo -; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 11, v0 -; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x41400000, vcc_lo -; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 12, v0 -; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x41500000, vcc_lo -; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 13, v0 -; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x41600000, vcc_lo -; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 14, v0 -; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x41700000, vcc_lo -; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 15, v0 -; GFX11-NEXT: v_cndmask_b32_e64 v0, v1, s0, vcc_lo +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 2, v0 +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x40400000, v1, vcc_lo +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 3, v0 +; GFX11-NEXT: v_cndmask_b32_e32 v1, 4.0, v1, vcc_lo +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 4, v0 +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x40a00000, v1, vcc_lo +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 5, v0 +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x40c00000, v1, vcc_lo +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 6, v0 +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x40e00000, v1, vcc_lo +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 7, v0 +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x41000000, v1, vcc_lo +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 8, v0 +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x41100000, v1, vcc_lo +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 9, v0 +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x41200000, v1, vcc_lo +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 10, v0 +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x41300000, v1, vcc_lo +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 11, v0 +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x41400000, v1, vcc_lo +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 12, v0 +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x41500000, v1, vcc_lo +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 13, v0 +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x41600000, v1, vcc_lo +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 14, v0 +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x41700000, v1, vcc_lo +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 15, v0 +; GFX11-NEXT: v_cndmask_b32_e32 v0, s0, v1, vcc_lo ; GFX11-NEXT: s_setpc_b64 s[30:31] entry: %ext = extractelement <15 x float> , i32 %sel @@ -3605,34 +3657,34 @@ define amdgpu_ps float @dyn_extract_v15f32_s_v(<15 x float> inreg %vec, i32 %sel ; GFX10PLUS-NEXT: v_mov_b32_e32 v1, s3 ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 ; GFX10PLUS-NEXT: v_cndmask_b32_e32 v1, s2, v1, vcc_lo -; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 -; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s4, vcc_lo -; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 -; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s5, vcc_lo -; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 -; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s6, vcc_lo -; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0 -; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s7, vcc_lo -; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0 -; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s8, vcc_lo -; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0 -; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s9, vcc_lo -; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 8, v0 -; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s10, vcc_lo -; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 9, v0 -; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s11, vcc_lo -; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 10, v0 -; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s12, vcc_lo -; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 11, v0 -; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s13, vcc_lo -; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 12, v0 -; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s14, vcc_lo -; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 13, v0 -; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s15, vcc_lo -; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 14, v0 -; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s16, vcc_lo -; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 15, v0 -; GFX10PLUS-NEXT: v_cndmask_b32_e64 v0, v1, s0, vcc_lo +; GFX10PLUS-NEXT: v_cmp_ne_u32_e32 vcc_lo, 2, v0 +; GFX10PLUS-NEXT: v_cndmask_b32_e32 v1, s4, v1, vcc_lo +; GFX10PLUS-NEXT: v_cmp_ne_u32_e32 vcc_lo, 3, v0 +; GFX10PLUS-NEXT: v_cndmask_b32_e32 v1, s5, v1, vcc_lo +; GFX10PLUS-NEXT: v_cmp_ne_u32_e32 vcc_lo, 4, v0 +; GFX10PLUS-NEXT: v_cndmask_b32_e32 v1, s6, v1, vcc_lo +; GFX10PLUS-NEXT: v_cmp_ne_u32_e32 vcc_lo, 5, v0 +; GFX10PLUS-NEXT: v_cndmask_b32_e32 v1, s7, v1, vcc_lo +; GFX10PLUS-NEXT: v_cmp_ne_u32_e32 vcc_lo, 6, v0 +; GFX10PLUS-NEXT: v_cndmask_b32_e32 v1, s8, v1, vcc_lo +; GFX10PLUS-NEXT: v_cmp_ne_u32_e32 vcc_lo, 7, v0 +; GFX10PLUS-NEXT: v_cndmask_b32_e32 v1, s9, v1, vcc_lo +; GFX10PLUS-NEXT: v_cmp_ne_u32_e32 vcc_lo, 8, v0 +; GFX10PLUS-NEXT: v_cndmask_b32_e32 v1, s10, v1, vcc_lo +; GFX10PLUS-NEXT: v_cmp_ne_u32_e32 vcc_lo, 9, v0 +; GFX10PLUS-NEXT: v_cndmask_b32_e32 v1, s11, v1, vcc_lo +; GFX10PLUS-NEXT: v_cmp_ne_u32_e32 vcc_lo, 10, v0 +; GFX10PLUS-NEXT: v_cndmask_b32_e32 v1, s12, v1, vcc_lo +; GFX10PLUS-NEXT: v_cmp_ne_u32_e32 vcc_lo, 11, v0 +; GFX10PLUS-NEXT: v_cndmask_b32_e32 v1, s13, v1, vcc_lo +; GFX10PLUS-NEXT: v_cmp_ne_u32_e32 vcc_lo, 12, v0 +; GFX10PLUS-NEXT: v_cndmask_b32_e32 v1, s14, v1, vcc_lo +; GFX10PLUS-NEXT: v_cmp_ne_u32_e32 vcc_lo, 13, v0 +; GFX10PLUS-NEXT: v_cndmask_b32_e32 v1, s15, v1, vcc_lo +; GFX10PLUS-NEXT: v_cmp_ne_u32_e32 vcc_lo, 14, v0 +; GFX10PLUS-NEXT: v_cndmask_b32_e32 v1, s16, v1, vcc_lo +; GFX10PLUS-NEXT: v_cmp_ne_u32_e32 vcc_lo, 15, v0 +; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, s0, v1, vcc_lo ; GFX10PLUS-NEXT: ; return to shader part epilog entry: %ext = extractelement <15 x float> %vec, i32 %sel @@ -3706,8 +3758,8 @@ define float @dyn_extract_v15f32_v_v(<15 x float> %vec, i32 %sel) { ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v13, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 14, v15 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc_lo -; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 15, v15 -; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, s4, vcc_lo +; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 15, v15 +; GFX10-NEXT: v_cndmask_b32_e32 v0, s4, v0, vcc_lo ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: dyn_extract_v15f32_v_v: @@ -3741,8 +3793,8 @@ define float @dyn_extract_v15f32_v_v(<15 x float> %vec, i32 %sel) { ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v13, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 14, v15 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc_lo -; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 15, v15 -; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, s0, vcc_lo +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 15, v15 +; GFX11-NEXT: v_cndmask_b32_e32 v0, s0, v0, vcc_lo ; GFX11-NEXT: s_setpc_b64 s[30:31] entry: %ext = extractelement <15 x float> %vec, i32 %sel @@ -3977,8 +4029,8 @@ define float @dyn_extract_v15f32_v_v_offset3(<15 x float> %vec, i32 %sel) { ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v13, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 14, v15 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc_lo -; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 15, v15 -; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, s4, vcc_lo +; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 15, v15 +; GFX10-NEXT: v_cndmask_b32_e32 v0, s4, v0, vcc_lo ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: dyn_extract_v15f32_v_v_offset3: @@ -4013,8 +4065,8 @@ define float @dyn_extract_v15f32_v_v_offset3(<15 x float> %vec, i32 %sel) { ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v13, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 14, v15 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc_lo -; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 15, v15 -; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, s0, vcc_lo +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 15, v15 +; GFX11-NEXT: v_cndmask_b32_e32 v0, s0, v0, vcc_lo ; GFX11-NEXT: s_setpc_b64 s[30:31] entry: %add = add i32 %sel, 3 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll index fc81e16d68e98..173c9cd3b84b5 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll @@ -6241,6 +6241,7 @@ define amdgpu_ps <4 x float> @v_fshl_i128_ssv(i128 inreg %lhs, i128 inreg %rhs, ; GFX10-NEXT: v_or_b32_e32 v0, v2, v0 ; GFX10-NEXT: v_sub_nc_u32_e32 v2, 64, v13 ; GFX10-NEXT: v_lshlrev_b64 v[4:5], v12, s[0:1] +<<<<<<< HEAD ; GFX10-NEXT: v_cmp_gt_u32_e64 s1, 64, v13 ; GFX10-NEXT: v_cmp_eq_u32_e64 s0, 0, v12 ; GFX10-NEXT: v_cndmask_b32_e32 v6, v6, v0, vcc_lo @@ -6262,6 +6263,32 @@ define amdgpu_ps <4 x float> @v_fshl_i128_ssv(i128 inreg %lhs, i128 inreg %rhs, ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s9, s4 ; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, v2, s1 ; GFX10-NEXT: v_cndmask_b32_e64 v3, 0, v3, s1 +======= +; GFX10-NEXT: v_or_b32_e32 v2, v2, v0 +; GFX10-NEXT: v_add_nc_u32_e32 v0, 0xffffffc0, v13 +; GFX10-NEXT: v_lshlrev_b64 v[8:9], v8, s[6:7] +; GFX10-NEXT: v_lshlrev_b64 v[10:11], v10, s[0:1] +; GFX10-NEXT: v_or_b32_e32 v3, v3, v1 +; GFX10-NEXT: v_cmp_gt_u32_e64 s0, 64, v13 +; GFX10-NEXT: v_lshrrev_b64 v[0:1], v0, s[6:7] +; GFX10-NEXT: v_cmp_ne_u32_e64 s1, 0, v13 +; GFX10-NEXT: v_or_b32_e32 v6, v6, v8 +; GFX10-NEXT: v_or_b32_e32 v7, v7, v9 +; GFX10-NEXT: v_cndmask_b32_e32 v8, v10, v2, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v10, v11, v3, vcc_lo +; GFX10-NEXT: v_lshrrev_b64 v[2:3], v13, s[6:7] +; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, v6, s0 +; GFX10-NEXT: v_cmp_ne_u32_e64 s4, 0, v12 +; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, v7, s0 +; GFX10-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v5, 0, v5, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e64 v0, s8, v0, s1 +; GFX10-NEXT: v_cndmask_b32_e64 v6, s2, v8, s4 +; GFX10-NEXT: v_cndmask_b32_e64 v7, s3, v10, s4 +; GFX10-NEXT: v_cndmask_b32_e64 v1, s9, v1, s1 +; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, v2, s0 +; GFX10-NEXT: v_cndmask_b32_e64 v3, 0, v3, s0 +>>>>>>> 51c61e76cef3 (added sgpr case, refactoring) ; GFX10-NEXT: v_or_b32_e32 v0, v4, v0 ; GFX10-NEXT: v_or_b32_e32 v1, v5, v1 ; GFX10-NEXT: v_or_b32_e32 v2, v6, v2 @@ -6277,6 +6304,7 @@ define amdgpu_ps <4 x float> @v_fshl_i128_ssv(i128 inreg %lhs, i128 inreg %rhs, ; GFX11-NEXT: s_mov_b32 s8, 0 ; GFX11-NEXT: v_sub_nc_u32_e32 v2, 64, v12 ; GFX11-NEXT: v_lshlrev_b64 v[0:1], v12, s[2:3] +<<<<<<< HEAD ; GFX11-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v12 ; GFX11-NEXT: v_and_b32_e32 v13, 0x7f, v6 ; GFX11-NEXT: v_add_nc_u32_e32 v7, 0xffffffc0, v12 @@ -6316,6 +6344,37 @@ define amdgpu_ps <4 x float> @v_fshl_i128_ssv(i128 inreg %lhs, i128 inreg %rhs, ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4) ; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, v2, s1 ; GFX11-NEXT: v_cndmask_b32_e64 v3, 0, v3, s1 +======= +; GFX11-NEXT: v_sub_nc_u32_e32 v8, 64, v13 +; GFX11-NEXT: v_add_nc_u32_e32 v10, 0xffffffc0, v12 +; GFX11-NEXT: v_lshrrev_b64 v[6:7], v13, s[8:9] +; GFX11-NEXT: v_lshrrev_b64 v[2:3], v3, s[0:1] +; GFX11-NEXT: v_cmp_ne_u32_e64 s4, 0, v12 +; GFX11-NEXT: v_lshlrev_b64 v[8:9], v8, s[6:7] +; GFX11-NEXT: v_lshlrev_b64 v[10:11], v10, s[0:1] +; GFX11-NEXT: v_cmp_gt_u32_e64 s0, 64, v13 +; GFX11-NEXT: v_cmp_ne_u32_e64 s1, 0, v13 +; GFX11-NEXT: v_or_b32_e32 v2, v2, v0 +; GFX11-NEXT: v_add_nc_u32_e32 v0, 0xffffffc0, v13 +; GFX11-NEXT: v_or_b32_e32 v3, v3, v1 +; GFX11-NEXT: v_or_b32_e32 v6, v6, v8 +; GFX11-NEXT: v_or_b32_e32 v7, v7, v9 +; GFX11-NEXT: v_cndmask_b32_e32 v8, v10, v2, vcc_lo +; GFX11-NEXT: v_lshrrev_b64 v[0:1], v0, s[6:7] +; GFX11-NEXT: v_cndmask_b32_e32 v10, v11, v3, vcc_lo +; GFX11-NEXT: v_lshrrev_b64 v[2:3], v13, s[6:7] +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, v6, s0 +; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, v7, s0 +; GFX11-NEXT: v_cndmask_b32_e64 v6, s2, v8, s4 +; GFX11-NEXT: v_cndmask_b32_e64 v7, s3, v10, s4 +; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, v2, s0 +; GFX11-NEXT: v_cndmask_b32_e64 v0, s8, v0, s1 +; GFX11-NEXT: v_cndmask_b32_e64 v1, s9, v1, s1 +; GFX11-NEXT: v_cndmask_b32_e64 v3, 0, v3, s0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX11-NEXT: v_or_b32_e32 v2, v6, v2 +>>>>>>> 51c61e76cef3 (added sgpr case, refactoring) ; GFX11-NEXT: v_or_b32_e32 v0, v4, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX11-NEXT: v_or_b32_e32 v1, v5, v1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll index 238cc06fc7f7c..e0093c4982bbb 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll @@ -6284,6 +6284,7 @@ define amdgpu_ps <4 x float> @v_fshr_i128_ssv(i128 inreg %lhs, i128 inreg %rhs, ; GFX10-NEXT: v_or_b32_e32 v8, v9, v11 ; GFX10-NEXT: v_cndmask_b32_e32 v6, v6, v0, vcc_lo ; GFX10-NEXT: v_lshrrev_b64 v[0:1], v14, s[6:7] +<<<<<<< HEAD ; GFX10-NEXT: v_cndmask_b32_e32 v7, v7, v2, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e64 s2, 0, v13 ; GFX10-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc_lo @@ -6300,6 +6301,27 @@ define amdgpu_ps <4 x float> @v_fshr_i128_ssv(i128 inreg %lhs, i128 inreg %rhs, ; GFX10-NEXT: v_or_b32_e32 v0, v4, v0 ; GFX10-NEXT: v_or_b32_e32 v1, v5, v1 ; GFX10-NEXT: v_or_b32_e32 v2, v6, v2 +======= +; GFX10-NEXT: v_or_b32_e32 v5, v5, v9 +; GFX10-NEXT: v_cmp_ne_u32_e64 s1, 0, v13 +; GFX10-NEXT: v_cndmask_b32_e32 v8, v10, v2, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v10, v11, v3, vcc_lo +; GFX10-NEXT: v_lshrrev_b64 v[2:3], v13, s[6:7] +; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, v4, s0 +; GFX10-NEXT: v_cmp_ne_u32_e64 s2, 0, v12 +; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, v5, s0 +; GFX10-NEXT: v_cndmask_b32_e32 v6, 0, v6, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v4, 0, v7, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e64 v0, s4, v0, s1 +; GFX10-NEXT: v_cndmask_b32_e64 v5, s8, v8, s2 +; GFX10-NEXT: v_cndmask_b32_e64 v7, s9, v10, s2 +; GFX10-NEXT: v_cndmask_b32_e64 v1, s5, v1, s1 +; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, v2, s0 +; GFX10-NEXT: v_cndmask_b32_e64 v3, 0, v3, s0 +; GFX10-NEXT: v_or_b32_e32 v0, v6, v0 +; GFX10-NEXT: v_or_b32_e32 v1, v4, v1 +; GFX10-NEXT: v_or_b32_e32 v2, v5, v2 +>>>>>>> 51c61e76cef3 (added sgpr case, refactoring) ; GFX10-NEXT: v_or_b32_e32 v3, v7, v3 ; GFX10-NEXT: ; return to shader part epilog ; @@ -6333,6 +6355,7 @@ define amdgpu_ps <4 x float> @v_fshr_i128_ssv(i128 inreg %lhs, i128 inreg %rhs, ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_4) | instid1(VALU_DEP_4) ; GFX11-NEXT: v_cndmask_b32_e32 v6, v6, v0, vcc_lo ; GFX11-NEXT: v_lshrrev_b64 v[0:1], v14, s[6:7] +<<<<<<< HEAD ; GFX11-NEXT: v_cndmask_b32_e32 v7, v7, v2, vcc_lo ; GFX11-NEXT: v_lshlrev_b64 v[4:5], v12, s[10:11] ; GFX11-NEXT: v_cmp_eq_u32_e64 s0, 0, v12 @@ -6349,6 +6372,26 @@ define amdgpu_ps <4 x float> @v_fshr_i128_ssv(i128 inreg %lhs, i128 inreg %rhs, ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX11-NEXT: v_or_b32_e32 v0, v4, v0 ; GFX11-NEXT: v_or_b32_e32 v1, v5, v1 +======= +; GFX11-NEXT: v_cmp_ne_u32_e64 s1, 0, v13 +; GFX11-NEXT: v_or_b32_e32 v4, v4, v8 +; GFX11-NEXT: v_or_b32_e32 v5, v5, v9 +; GFX11-NEXT: v_cndmask_b32_e32 v8, v10, v2, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v10, v11, v3, vcc_lo +; GFX11-NEXT: v_lshrrev_b64 v[2:3], v13, s[6:7] +; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, v4, s0 +; GFX11-NEXT: v_cmp_ne_u32_e64 s2, 0, v12 +; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, v5, s0 +; GFX11-NEXT: v_cndmask_b32_e32 v4, 0, v7, vcc_lo +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX11-NEXT: v_cndmask_b32_e64 v0, s4, v0, s1 +; GFX11-NEXT: v_cndmask_b32_e64 v5, s8, v8, s2 +; GFX11-NEXT: v_cndmask_b32_e64 v7, s9, v10, s2 +; GFX11-NEXT: v_cndmask_b32_e64 v1, s5, v1, s1 +; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, v2, s0 +; GFX11-NEXT: v_cndmask_b32_e64 v3, 0, v3, s0 +; GFX11-NEXT: v_or_b32_e32 v0, v6, v0 +>>>>>>> 51c61e76cef3 (added sgpr case, refactoring) ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX11-NEXT: v_or_b32_e32 v2, v6, v2 ; GFX11-NEXT: v_or_b32_e32 v3, v7, v3 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.i16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.i16.ll index 2eb7486a2684d..261ca8154cbae 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.i16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.i16.ll @@ -2335,17 +2335,17 @@ define amdgpu_ps void @insertelement_s_v8i16_s_v(ptr addrspace(4) inreg %ptr, i1 ; GFX10-NEXT: s_and_b32 s1, s4, 0xffff ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v6 ; GFX10-NEXT: v_lshlrev_b32_e32 v1, 4, v1 -; GFX10-NEXT: v_cmp_eq_u32_e64 s0, 2, v6 +; GFX10-NEXT: v_cmp_ne_u32_e64 s0, 2, v6 ; GFX10-NEXT: v_cmp_eq_u32_e64 s2, 0, v6 ; GFX10-NEXT: v_lshlrev_b32_e64 v2, v1, 0xffff ; GFX10-NEXT: v_lshlrev_b32_e64 v4, v1, s1 -; GFX10-NEXT: v_cmp_eq_u32_e64 s1, 3, v6 +; GFX10-NEXT: v_cmp_ne_u32_e64 s1, 3, v6 ; GFX10-NEXT: v_not_b32_e32 v5, v2 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v0, s9 ; GFX10-NEXT: v_cndmask_b32_e32 v0, s8, v0, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, s10, s0 -; GFX10-NEXT: v_cndmask_b32_e64 v7, v0, s11, s1 +; GFX10-NEXT: v_cndmask_b32_e64 v0, s10, v0, s0 +; GFX10-NEXT: v_cndmask_b32_e64 v7, s11, v0, s1 ; GFX10-NEXT: v_mov_b32_e32 v0, s8 ; GFX10-NEXT: v_mov_b32_e32 v1, s9 ; GFX10-NEXT: v_mov_b32_e32 v2, s10 @@ -2355,8 +2355,8 @@ define amdgpu_ps void @insertelement_s_v8i16_s_v(ptr addrspace(4) inreg %ptr, i1 ; GFX10-NEXT: v_mov_b32_e32 v5, 0 ; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, v7, s2 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, v7, s0 -; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, v7, s1 +; GFX10-NEXT: v_cndmask_b32_e64 v2, v7, v2, s0 +; GFX10-NEXT: v_cndmask_b32_e64 v3, v7, v3, s1 ; GFX10-NEXT: global_store_dwordx4 v[4:5], v[0:3], off ; GFX10-NEXT: s_endpgm ; @@ -2368,7 +2368,7 @@ define amdgpu_ps void @insertelement_s_v8i16_s_v(ptr addrspace(4) inreg %ptr, i1 ; GFX11-NEXT: s_and_b32 s1, s4, 0xffff ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_4) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v6 -; GFX11-NEXT: v_cmp_eq_u32_e64 s0, 2, v6 +; GFX11-NEXT: v_cmp_ne_u32_e64 s0, 2, v6 ; GFX11-NEXT: v_cmp_eq_u32_e64 s2, 0, v6 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: v_dual_mov_b32 v0, s9 :: v_dual_lshlrev_b32 v1, 4, v1 @@ -2376,11 +2376,11 @@ define amdgpu_ps void @insertelement_s_v8i16_s_v(ptr addrspace(4) inreg %ptr, i1 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_4) ; GFX11-NEXT: v_lshlrev_b32_e64 v2, v1, 0xffff ; GFX11-NEXT: v_lshlrev_b32_e64 v4, v1, s1 -; GFX11-NEXT: v_cmp_eq_u32_e64 s1, 3, v6 -; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, s10, s0 +; GFX11-NEXT: v_cmp_ne_u32_e64 s1, 3, v6 +; GFX11-NEXT: v_cndmask_b32_e64 v0, s10, v0, s0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_not_b32_e32 v5, v2 -; GFX11-NEXT: v_cndmask_b32_e64 v7, v0, s11, s1 +; GFX11-NEXT: v_cndmask_b32_e64 v7, s11, v0, s1 ; GFX11-NEXT: v_dual_mov_b32 v0, s8 :: v_dual_mov_b32 v1, s9 ; GFX11-NEXT: v_dual_mov_b32 v2, s10 :: v_dual_mov_b32 v3, s11 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_3) @@ -2388,9 +2388,14 @@ define amdgpu_ps void @insertelement_s_v8i16_s_v(ptr addrspace(4) inreg %ptr, i1 ; GFX11-NEXT: v_mov_b32_e32 v4, 0 ; GFX11-NEXT: v_mov_b32_e32 v5, 0 ; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, v7, s2 +<<<<<<< HEAD ; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc_lo ; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, v7, s0 ; GFX11-NEXT: v_cndmask_b32_e64 v3, v3, v7, s1 +======= +; GFX11-NEXT: v_cndmask_b32_e64 v2, v7, v2, s0 +; GFX11-NEXT: v_cndmask_b32_e64 v3, v7, v3, s1 +>>>>>>> 51c61e76cef3 (added sgpr case, refactoring) ; GFX11-NEXT: global_store_b128 v[4:5], v[0:3], off ; GFX11-NEXT: s_endpgm %vec = load <8 x i16>, ptr addrspace(4) %ptr @@ -2518,8 +2523,8 @@ define amdgpu_ps void @insertelement_s_v8i16_v_v(ptr addrspace(4) inreg %ptr, i1 ; GFX10-NEXT: v_and_b32_e32 v2, 1, v1 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v6 ; GFX10-NEXT: v_lshlrev_b32_e32 v2, 4, v2 -; GFX10-NEXT: v_cmp_eq_u32_e64 s0, 2, v6 -; GFX10-NEXT: v_cmp_eq_u32_e64 s1, 3, v6 +; GFX10-NEXT: v_cmp_ne_u32_e64 s0, 2, v6 +; GFX10-NEXT: v_cmp_ne_u32_e64 s1, 3, v6 ; GFX10-NEXT: s_mov_b32 null, 0 ; GFX10-NEXT: v_cmp_eq_u32_e64 s2, 0, v6 ; GFX10-NEXT: v_lshlrev_b32_e64 v3, v2, 0xffff @@ -2528,8 +2533,8 @@ define amdgpu_ps void @insertelement_s_v8i16_v_v(ptr addrspace(4) inreg %ptr, i1 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-NEXT: v_cndmask_b32_e32 v1, s4, v1, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s6, s0 -; GFX10-NEXT: v_cndmask_b32_e64 v7, v1, s7, s1 +; GFX10-NEXT: v_cndmask_b32_e64 v1, s6, v1, s0 +; GFX10-NEXT: v_cndmask_b32_e64 v7, s7, v1, s1 ; GFX10-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-NEXT: v_mov_b32_e32 v2, s6 @@ -2539,8 +2544,8 @@ define amdgpu_ps void @insertelement_s_v8i16_v_v(ptr addrspace(4) inreg %ptr, i1 ; GFX10-NEXT: v_mov_b32_e32 v5, 0 ; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, v7, s2 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, v7, s0 -; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, v7, s1 +; GFX10-NEXT: v_cndmask_b32_e64 v2, v7, v2, s0 +; GFX10-NEXT: v_cndmask_b32_e64 v3, v7, v3, s1 ; GFX10-NEXT: global_store_dwordx4 v[4:5], v[0:3], off ; GFX10-NEXT: s_endpgm ; @@ -2552,8 +2557,8 @@ define amdgpu_ps void @insertelement_s_v8i16_v_v(ptr addrspace(4) inreg %ptr, i1 ; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v6 -; GFX11-NEXT: v_cmp_eq_u32_e64 s0, 2, v6 -; GFX11-NEXT: v_cmp_eq_u32_e64 s1, 3, v6 +; GFX11-NEXT: v_cmp_ne_u32_e64 s0, 2, v6 +; GFX11-NEXT: v_cmp_ne_u32_e64 s1, 3, v6 ; GFX11-NEXT: v_cmp_eq_u32_e64 s2, 0, v6 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: v_dual_mov_b32 v1, s5 :: v_dual_lshlrev_b32 v2, 4, v2 @@ -2562,10 +2567,10 @@ define amdgpu_ps void @insertelement_s_v8i16_v_v(ptr addrspace(4) inreg %ptr, i1 ; GFX11-NEXT: v_lshlrev_b32_e64 v3, v2, 0xffff ; GFX11-NEXT: v_lshlrev_b32_e32 v4, v2, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s6, s0 +; GFX11-NEXT: v_cndmask_b32_e64 v1, s6, v1, s0 ; GFX11-NEXT: v_not_b32_e32 v5, v3 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-NEXT: v_cndmask_b32_e64 v7, v1, s7, s1 +; GFX11-NEXT: v_cndmask_b32_e64 v7, s7, v1, s1 ; GFX11-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5 ; GFX11-NEXT: v_mov_b32_e32 v3, s7 ; GFX11-NEXT: v_and_or_b32 v7, v7, v5, v4 @@ -2574,9 +2579,14 @@ define amdgpu_ps void @insertelement_s_v8i16_v_v(ptr addrspace(4) inreg %ptr, i1 ; GFX11-NEXT: v_mov_b32_e32 v5, 0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) ; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, v7, s2 +<<<<<<< HEAD ; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc_lo ; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, v7, s0 ; GFX11-NEXT: v_cndmask_b32_e64 v3, v3, v7, s1 +======= +; GFX11-NEXT: v_cndmask_b32_e64 v2, v7, v2, s0 +; GFX11-NEXT: v_cndmask_b32_e64 v3, v7, v3, s1 +>>>>>>> 51c61e76cef3 (added sgpr case, refactoring) ; GFX11-NEXT: global_store_b128 v[4:5], v[0:3], off ; GFX11-NEXT: s_endpgm %vec = load <8 x i16>, ptr addrspace(4) %ptr @@ -3752,26 +3762,26 @@ define amdgpu_ps void @insertelement_s_v16i16_s_v(ptr addrspace(4) inreg %ptr, i ; GFX10-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX10-NEXT: s_and_b32 s5, s4, 0xffff ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v12 -; GFX10-NEXT: v_cmp_eq_u32_e64 s0, 2, v12 -; GFX10-NEXT: v_cmp_eq_u32_e64 s1, 3, v12 -; GFX10-NEXT: v_cmp_eq_u32_e64 s2, 4, v12 -; GFX10-NEXT: v_cmp_eq_u32_e64 s3, 5, v12 +; GFX10-NEXT: v_cmp_ne_u32_e64 s0, 2, v12 +; GFX10-NEXT: v_cmp_ne_u32_e64 s1, 3, v12 +; GFX10-NEXT: v_cmp_ne_u32_e64 s2, 4, v12 +; GFX10-NEXT: v_cmp_ne_u32_e64 s3, 5, v12 ; GFX10-NEXT: v_lshlrev_b32_e32 v0, 4, v0 -; GFX10-NEXT: v_cmp_eq_u32_e64 s4, 6, v12 +; GFX10-NEXT: v_cmp_ne_u32_e64 s4, 6, v12 ; GFX10-NEXT: v_cmp_eq_u32_e64 s6, 0, v12 ; GFX10-NEXT: v_lshlrev_b32_e64 v2, v0, 0xffff ; GFX10-NEXT: v_lshlrev_b32_e64 v8, v0, s5 -; GFX10-NEXT: v_cmp_eq_u32_e64 s5, 7, v12 +; GFX10-NEXT: v_cmp_ne_u32_e64 s5, 7, v12 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v1, s9 ; GFX10-NEXT: v_not_b32_e32 v9, v2 ; GFX10-NEXT: v_cndmask_b32_e32 v1, s8, v1, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s10, s0 -; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s11, s1 -; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s12, s2 -; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s13, s3 -; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s14, s4 -; GFX10-NEXT: v_cndmask_b32_e64 v10, v1, s15, s5 +; GFX10-NEXT: v_cndmask_b32_e64 v1, s10, v1, s0 +; GFX10-NEXT: v_cndmask_b32_e64 v1, s11, v1, s1 +; GFX10-NEXT: v_cndmask_b32_e64 v1, s12, v1, s2 +; GFX10-NEXT: v_cndmask_b32_e64 v1, s13, v1, s3 +; GFX10-NEXT: v_cndmask_b32_e64 v1, s14, v1, s4 +; GFX10-NEXT: v_cndmask_b32_e64 v10, s15, v1, s5 ; GFX10-NEXT: v_mov_b32_e32 v0, s8 ; GFX10-NEXT: v_mov_b32_e32 v1, s9 ; GFX10-NEXT: v_mov_b32_e32 v2, s10 @@ -3786,13 +3796,13 @@ define amdgpu_ps void @insertelement_s_v16i16_s_v(ptr addrspace(4) inreg %ptr, i ; GFX10-NEXT: v_mov_b32_e32 v10, 16 ; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, v13, s6 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, v13, s0 -; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, v13, s1 +; GFX10-NEXT: v_cndmask_b32_e64 v2, v13, v2, s0 +; GFX10-NEXT: v_cndmask_b32_e64 v3, v13, v3, s1 ; GFX10-NEXT: v_mov_b32_e32 v11, 0 -; GFX10-NEXT: v_cndmask_b32_e64 v4, v4, v13, s2 -; GFX10-NEXT: v_cndmask_b32_e64 v5, v5, v13, s3 -; GFX10-NEXT: v_cndmask_b32_e64 v6, v6, v13, s4 -; GFX10-NEXT: v_cndmask_b32_e64 v7, v7, v13, s5 +; GFX10-NEXT: v_cndmask_b32_e64 v4, v13, v4, s2 +; GFX10-NEXT: v_cndmask_b32_e64 v5, v13, v5, s3 +; GFX10-NEXT: v_cndmask_b32_e64 v6, v13, v6, s4 +; GFX10-NEXT: v_cndmask_b32_e64 v7, v13, v7, s5 ; GFX10-NEXT: global_store_dwordx4 v[8:9], v[0:3], off ; GFX10-NEXT: global_store_dwordx4 v[10:11], v[4:7], off ; GFX10-NEXT: s_endpgm @@ -3805,11 +3815,11 @@ define amdgpu_ps void @insertelement_s_v16i16_s_v(ptr addrspace(4) inreg %ptr, i ; GFX11-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v12 -; GFX11-NEXT: v_cmp_eq_u32_e64 s0, 2, v12 -; GFX11-NEXT: v_cmp_eq_u32_e64 s1, 3, v12 -; GFX11-NEXT: v_cmp_eq_u32_e64 s2, 4, v12 -; GFX11-NEXT: v_cmp_eq_u32_e64 s3, 5, v12 -; GFX11-NEXT: v_cmp_eq_u32_e64 s4, 6, v12 +; GFX11-NEXT: v_cmp_ne_u32_e64 s0, 2, v12 +; GFX11-NEXT: v_cmp_ne_u32_e64 s1, 3, v12 +; GFX11-NEXT: v_cmp_ne_u32_e64 s2, 4, v12 +; GFX11-NEXT: v_cmp_ne_u32_e64 s3, 5, v12 +; GFX11-NEXT: v_cmp_ne_u32_e64 s4, 6, v12 ; GFX11-NEXT: v_cmp_eq_u32_e64 s6, 0, v12 ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 4, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -3817,19 +3827,19 @@ define amdgpu_ps void @insertelement_s_v16i16_s_v(ptr addrspace(4) inreg %ptr, i ; GFX11-NEXT: v_lshlrev_b32_e64 v8, v0, s5 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: v_mov_b32_e32 v1, s9 -; GFX11-NEXT: v_cmp_eq_u32_e64 s5, 7, v12 +; GFX11-NEXT: v_cmp_ne_u32_e64 s5, 7, v12 ; GFX11-NEXT: v_not_b32_e32 v9, v2 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_cndmask_b32_e32 v1, s8, v1, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s10, s0 +; GFX11-NEXT: v_cndmask_b32_e64 v1, s10, v1, s0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s11, s1 -; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s12, s2 +; GFX11-NEXT: v_cndmask_b32_e64 v1, s11, v1, s1 +; GFX11-NEXT: v_cndmask_b32_e64 v1, s12, v1, s2 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s13, s3 -; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s14, s4 +; GFX11-NEXT: v_cndmask_b32_e64 v1, s13, v1, s3 +; GFX11-NEXT: v_cndmask_b32_e64 v1, s14, v1, s4 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-NEXT: v_cndmask_b32_e64 v10, v1, s15, s5 +; GFX11-NEXT: v_cndmask_b32_e64 v10, s15, v1, s5 ; GFX11-NEXT: v_dual_mov_b32 v0, s8 :: v_dual_mov_b32 v1, s9 ; GFX11-NEXT: v_dual_mov_b32 v2, s10 :: v_dual_mov_b32 v3, s11 ; GFX11-NEXT: v_and_or_b32 v13, v10, v9, v8 @@ -3839,13 +3849,13 @@ define amdgpu_ps void @insertelement_s_v16i16_s_v(ptr addrspace(4) inreg %ptr, i ; GFX11-NEXT: v_dual_mov_b32 v9, 0 :: v_dual_mov_b32 v10, 16 ; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, v13, s6 ; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, v13, s0 -; GFX11-NEXT: v_cndmask_b32_e64 v3, v3, v13, s1 +; GFX11-NEXT: v_cndmask_b32_e64 v2, v13, v2, s0 +; GFX11-NEXT: v_cndmask_b32_e64 v3, v13, v3, s1 ; GFX11-NEXT: v_mov_b32_e32 v11, 0 -; GFX11-NEXT: v_cndmask_b32_e64 v4, v4, v13, s2 -; GFX11-NEXT: v_cndmask_b32_e64 v5, v5, v13, s3 -; GFX11-NEXT: v_cndmask_b32_e64 v6, v6, v13, s4 -; GFX11-NEXT: v_cndmask_b32_e64 v7, v7, v13, s5 +; GFX11-NEXT: v_cndmask_b32_e64 v4, v13, v4, s2 +; GFX11-NEXT: v_cndmask_b32_e64 v5, v13, v5, s3 +; GFX11-NEXT: v_cndmask_b32_e64 v6, v13, v6, s4 +; GFX11-NEXT: v_cndmask_b32_e64 v7, v13, v7, s5 ; GFX11-NEXT: s_clause 0x1 ; GFX11-NEXT: global_store_b128 v[8:9], v[0:3], off ; GFX11-NEXT: global_store_b128 v[10:11], v[4:7], off @@ -4042,14 +4052,14 @@ define amdgpu_ps void @insertelement_s_v16i16_v_v(ptr addrspace(4) inreg %ptr, i ; GFX10-NEXT: v_lshrrev_b32_e32 v12, 1, v1 ; GFX10-NEXT: v_and_b32_e32 v1, 1, v1 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v12 -; GFX10-NEXT: v_cmp_eq_u32_e64 s0, 2, v12 -; GFX10-NEXT: v_cmp_eq_u32_e64 s1, 3, v12 +; GFX10-NEXT: v_cmp_ne_u32_e64 s0, 2, v12 +; GFX10-NEXT: v_cmp_ne_u32_e64 s1, 3, v12 ; GFX10-NEXT: s_mov_b32 null, 0 -; GFX10-NEXT: v_cmp_eq_u32_e64 s2, 4, v12 -; GFX10-NEXT: v_cmp_eq_u32_e64 s3, 5, v12 +; GFX10-NEXT: v_cmp_ne_u32_e64 s2, 4, v12 +; GFX10-NEXT: v_cmp_ne_u32_e64 s3, 5, v12 ; GFX10-NEXT: v_lshlrev_b32_e32 v1, 4, v1 -; GFX10-NEXT: v_cmp_eq_u32_e64 s4, 6, v12 -; GFX10-NEXT: v_cmp_eq_u32_e64 s5, 7, v12 +; GFX10-NEXT: v_cmp_ne_u32_e64 s4, 6, v12 +; GFX10-NEXT: v_cmp_ne_u32_e64 s5, 7, v12 ; GFX10-NEXT: v_cmp_eq_u32_e64 s6, 0, v12 ; GFX10-NEXT: v_lshlrev_b32_e64 v3, v1, 0xffff ; GFX10-NEXT: v_lshlrev_b32_sdwa v8, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 @@ -4057,12 +4067,12 @@ define amdgpu_ps void @insertelement_s_v16i16_v_v(ptr addrspace(4) inreg %ptr, i ; GFX10-NEXT: v_mov_b32_e32 v2, s9 ; GFX10-NEXT: v_not_b32_e32 v9, v3 ; GFX10-NEXT: v_cndmask_b32_e32 v2, s8, v2, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s10, s0 -; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s11, s1 -; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s12, s2 -; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s13, s3 -; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s14, s4 -; GFX10-NEXT: v_cndmask_b32_e64 v10, v2, s15, s5 +; GFX10-NEXT: v_cndmask_b32_e64 v2, s10, v2, s0 +; GFX10-NEXT: v_cndmask_b32_e64 v2, s11, v2, s1 +; GFX10-NEXT: v_cndmask_b32_e64 v2, s12, v2, s2 +; GFX10-NEXT: v_cndmask_b32_e64 v2, s13, v2, s3 +; GFX10-NEXT: v_cndmask_b32_e64 v2, s14, v2, s4 +; GFX10-NEXT: v_cndmask_b32_e64 v10, s15, v2, s5 ; GFX10-NEXT: v_mov_b32_e32 v0, s8 ; GFX10-NEXT: v_mov_b32_e32 v1, s9 ; GFX10-NEXT: v_mov_b32_e32 v2, s10 @@ -4077,13 +4087,13 @@ define amdgpu_ps void @insertelement_s_v16i16_v_v(ptr addrspace(4) inreg %ptr, i ; GFX10-NEXT: v_mov_b32_e32 v10, 16 ; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, v13, s6 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, v13, s0 -; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, v13, s1 +; GFX10-NEXT: v_cndmask_b32_e64 v2, v13, v2, s0 +; GFX10-NEXT: v_cndmask_b32_e64 v3, v13, v3, s1 ; GFX10-NEXT: v_mov_b32_e32 v11, 0 -; GFX10-NEXT: v_cndmask_b32_e64 v4, v4, v13, s2 -; GFX10-NEXT: v_cndmask_b32_e64 v5, v5, v13, s3 -; GFX10-NEXT: v_cndmask_b32_e64 v6, v6, v13, s4 -; GFX10-NEXT: v_cndmask_b32_e64 v7, v7, v13, s5 +; GFX10-NEXT: v_cndmask_b32_e64 v4, v13, v4, s2 +; GFX10-NEXT: v_cndmask_b32_e64 v5, v13, v5, s3 +; GFX10-NEXT: v_cndmask_b32_e64 v6, v13, v6, s4 +; GFX10-NEXT: v_cndmask_b32_e64 v7, v13, v7, s5 ; GFX10-NEXT: global_store_dwordx4 v[8:9], v[0:3], off ; GFX10-NEXT: global_store_dwordx4 v[10:11], v[4:7], off ; GFX10-NEXT: s_endpgm @@ -4096,12 +4106,12 @@ define amdgpu_ps void @insertelement_s_v16i16_v_v(ptr addrspace(4) inreg %ptr, i ; GFX11-NEXT: v_and_b32_e32 v1, 1, v1 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v12 -; GFX11-NEXT: v_cmp_eq_u32_e64 s0, 2, v12 -; GFX11-NEXT: v_cmp_eq_u32_e64 s1, 3, v12 -; GFX11-NEXT: v_cmp_eq_u32_e64 s2, 4, v12 -; GFX11-NEXT: v_cmp_eq_u32_e64 s3, 5, v12 -; GFX11-NEXT: v_cmp_eq_u32_e64 s4, 6, v12 -; GFX11-NEXT: v_cmp_eq_u32_e64 s5, 7, v12 +; GFX11-NEXT: v_cmp_ne_u32_e64 s0, 2, v12 +; GFX11-NEXT: v_cmp_ne_u32_e64 s1, 3, v12 +; GFX11-NEXT: v_cmp_ne_u32_e64 s2, 4, v12 +; GFX11-NEXT: v_cmp_ne_u32_e64 s3, 5, v12 +; GFX11-NEXT: v_cmp_ne_u32_e64 s4, 6, v12 +; GFX11-NEXT: v_cmp_ne_u32_e64 s5, 7, v12 ; GFX11-NEXT: v_cmp_eq_u32_e64 s6, 0, v12 ; GFX11-NEXT: v_lshlrev_b32_e32 v1, 4, v1 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_3) @@ -4112,15 +4122,15 @@ define amdgpu_ps void @insertelement_s_v16i16_v_v(ptr addrspace(4) inreg %ptr, i ; GFX11-NEXT: v_not_b32_e32 v9, v3 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_cndmask_b32_e32 v2, s8, v2, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s10, s0 +; GFX11-NEXT: v_cndmask_b32_e64 v2, s10, v2, s0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s11, s1 -; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s12, s2 +; GFX11-NEXT: v_cndmask_b32_e64 v2, s11, v2, s1 +; GFX11-NEXT: v_cndmask_b32_e64 v2, s12, v2, s2 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s13, s3 -; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s14, s4 +; GFX11-NEXT: v_cndmask_b32_e64 v2, s13, v2, s3 +; GFX11-NEXT: v_cndmask_b32_e64 v2, s14, v2, s4 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_cndmask_b32_e64 v10, v2, s15, s5 +; GFX11-NEXT: v_cndmask_b32_e64 v10, s15, v2, s5 ; GFX11-NEXT: v_dual_mov_b32 v0, s8 :: v_dual_mov_b32 v5, s13 ; GFX11-NEXT: v_dual_mov_b32 v1, s9 :: v_dual_mov_b32 v2, s10 ; GFX11-NEXT: v_mov_b32_e32 v7, s15 @@ -4131,14 +4141,19 @@ define amdgpu_ps void @insertelement_s_v16i16_v_v(ptr addrspace(4) inreg %ptr, i ; GFX11-NEXT: v_mov_b32_e32 v8, 0 ; GFX11-NEXT: v_dual_mov_b32 v9, 0 :: v_dual_mov_b32 v10, 16 ; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, v13, s6 +<<<<<<< HEAD ; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc_lo ; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, v13, s0 ; GFX11-NEXT: v_cndmask_b32_e64 v3, v3, v13, s1 +======= +; GFX11-NEXT: v_cndmask_b32_e64 v2, v13, v2, s0 +; GFX11-NEXT: v_cndmask_b32_e64 v3, v13, v3, s1 +>>>>>>> 51c61e76cef3 (added sgpr case, refactoring) ; GFX11-NEXT: v_mov_b32_e32 v11, 0 -; GFX11-NEXT: v_cndmask_b32_e64 v4, v4, v13, s2 -; GFX11-NEXT: v_cndmask_b32_e64 v5, v5, v13, s3 -; GFX11-NEXT: v_cndmask_b32_e64 v6, v6, v13, s4 -; GFX11-NEXT: v_cndmask_b32_e64 v7, v7, v13, s5 +; GFX11-NEXT: v_cndmask_b32_e64 v4, v13, v4, s2 +; GFX11-NEXT: v_cndmask_b32_e64 v5, v13, v5, s3 +; GFX11-NEXT: v_cndmask_b32_e64 v6, v13, v6, s4 +; GFX11-NEXT: v_cndmask_b32_e64 v7, v13, v7, s5 ; GFX11-NEXT: s_clause 0x1 ; GFX11-NEXT: global_store_b128 v[8:9], v[0:3], off ; GFX11-NEXT: global_store_b128 v[10:11], v[4:7], off diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.i8.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.i8.ll index 1701a9cc7f09b..18a1a5544c722 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.i8.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.i8.ll @@ -382,15 +382,15 @@ define amdgpu_ps void @insertelement_s_v2i8_s_v(ptr addrspace(4) inreg %ptr, i8 ; GFX10-LABEL: insertelement_s_v2i8_s_v: ; GFX10: ; %bb.0: ; GFX10-NEXT: v_mov_b32_e32 v1, 0 -; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 +; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 1, v0 ; GFX10-NEXT: v_mov_b32_e32 v3, 0xff ; GFX10-NEXT: global_load_ushort v1, v1, s[2:3] ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_lshrrev_b32_e32 v2, 8, v1 -; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s4, vcc_lo -; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX10-NEXT: v_cndmask_b32_e32 v2, s4, v2, vcc_lo +; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 ; GFX10-NEXT: v_and_b32_sdwa v2, v2, v3 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX10-NEXT: v_cndmask_b32_e64 v4, v1, s4, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v4, s4, v1, vcc_lo ; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: v_or_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD @@ -399,15 +399,15 @@ define amdgpu_ps void @insertelement_s_v2i8_s_v(ptr addrspace(4) inreg %ptr, i8 ; ; GFX11-LABEL: insertelement_s_v2i8_s_v: ; GFX11: ; %bb.0: +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 1, v0 ; GFX11-NEXT: v_mov_b32_e32 v1, 0 -; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 ; GFX11-NEXT: global_load_u16 v1, v1, s[2:3] ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_lshrrev_b32_e32 v2, 8, v1 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s4, vcc_lo -; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NEXT: v_cndmask_b32_e64 v0, v1, s4, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v2, s4, v2, vcc_lo +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 +; GFX11-NEXT: v_cndmask_b32_e32 v0, s4, v1, vcc_lo ; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v2 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v0 @@ -589,14 +589,14 @@ define amdgpu_ps void @insertelement_v_v2i8_s_v(ptr addrspace(1) %ptr, i8 inreg ; GFX10-LABEL: insertelement_v_v2i8_s_v: ; GFX10: ; %bb.0: ; GFX10-NEXT: global_load_ushort v0, v[0:1], off -; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v2 +; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 1, v2 ; GFX10-NEXT: v_mov_b32_e32 v3, 0xff ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_lshrrev_b32_e32 v1, 8, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s2, vcc_lo -; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v2 +; GFX10-NEXT: v_cndmask_b32_e32 v1, s2, v1, vcc_lo +; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v2 ; GFX10-NEXT: v_and_b32_sdwa v3, v1, v3 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX10-NEXT: v_cndmask_b32_e64 v2, v0, s2, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v2, s2, v0, vcc_lo ; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: v_or_b32_sdwa v2, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD @@ -606,14 +606,14 @@ define amdgpu_ps void @insertelement_v_v2i8_s_v(ptr addrspace(1) %ptr, i8 inreg ; GFX11-LABEL: insertelement_v_v2i8_s_v: ; GFX11: ; %bb.0: ; GFX11-NEXT: global_load_u16 v0, v[0:1], off -; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v2 +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 1, v2 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_lshrrev_b32_e32 v1, 8, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s2, vcc_lo -; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v2 +; GFX11-NEXT: v_cndmask_b32_e32 v1, s2, v1, vcc_lo +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v2 ; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, s2, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v0, s2, v0, vcc_lo ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_lshlrev_b16 v3, 8, v1 ; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v0 @@ -3198,17 +3198,17 @@ define amdgpu_ps void @insertelement_s_v16i8_s_v(ptr addrspace(4) inreg %ptr, i8 ; GFX10-NEXT: s_and_b32 s1, s4, 0xff ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v6 ; GFX10-NEXT: v_lshlrev_b32_e32 v1, 3, v1 -; GFX10-NEXT: v_cmp_eq_u32_e64 s0, 2, v6 +; GFX10-NEXT: v_cmp_ne_u32_e64 s0, 2, v6 ; GFX10-NEXT: v_cmp_eq_u32_e64 s2, 0, v6 ; GFX10-NEXT: v_lshlrev_b32_e64 v2, v1, 0xff ; GFX10-NEXT: v_lshlrev_b32_e64 v4, v1, s1 -; GFX10-NEXT: v_cmp_eq_u32_e64 s1, 3, v6 +; GFX10-NEXT: v_cmp_ne_u32_e64 s1, 3, v6 ; GFX10-NEXT: v_not_b32_e32 v5, v2 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v0, s9 ; GFX10-NEXT: v_cndmask_b32_e32 v0, s8, v0, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, s10, s0 -; GFX10-NEXT: v_cndmask_b32_e64 v7, v0, s11, s1 +; GFX10-NEXT: v_cndmask_b32_e64 v0, s10, v0, s0 +; GFX10-NEXT: v_cndmask_b32_e64 v7, s11, v0, s1 ; GFX10-NEXT: v_mov_b32_e32 v0, s8 ; GFX10-NEXT: v_mov_b32_e32 v1, s9 ; GFX10-NEXT: v_mov_b32_e32 v2, s10 @@ -3218,8 +3218,8 @@ define amdgpu_ps void @insertelement_s_v16i8_s_v(ptr addrspace(4) inreg %ptr, i8 ; GFX10-NEXT: v_mov_b32_e32 v5, 0 ; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, v7, s2 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, v7, s0 -; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, v7, s1 +; GFX10-NEXT: v_cndmask_b32_e64 v2, v7, v2, s0 +; GFX10-NEXT: v_cndmask_b32_e64 v3, v7, v3, s1 ; GFX10-NEXT: global_store_dwordx4 v[4:5], v[0:3], off ; GFX10-NEXT: s_endpgm ; @@ -3231,7 +3231,7 @@ define amdgpu_ps void @insertelement_s_v16i8_s_v(ptr addrspace(4) inreg %ptr, i8 ; GFX11-NEXT: s_and_b32 s1, s4, 0xff ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_4) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v6 -; GFX11-NEXT: v_cmp_eq_u32_e64 s0, 2, v6 +; GFX11-NEXT: v_cmp_ne_u32_e64 s0, 2, v6 ; GFX11-NEXT: v_cmp_eq_u32_e64 s2, 0, v6 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: v_dual_mov_b32 v0, s9 :: v_dual_lshlrev_b32 v1, 3, v1 @@ -3239,11 +3239,11 @@ define amdgpu_ps void @insertelement_s_v16i8_s_v(ptr addrspace(4) inreg %ptr, i8 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_4) ; GFX11-NEXT: v_lshlrev_b32_e64 v2, v1, 0xff ; GFX11-NEXT: v_lshlrev_b32_e64 v4, v1, s1 -; GFX11-NEXT: v_cmp_eq_u32_e64 s1, 3, v6 -; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, s10, s0 +; GFX11-NEXT: v_cmp_ne_u32_e64 s1, 3, v6 +; GFX11-NEXT: v_cndmask_b32_e64 v0, s10, v0, s0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_not_b32_e32 v5, v2 -; GFX11-NEXT: v_cndmask_b32_e64 v7, v0, s11, s1 +; GFX11-NEXT: v_cndmask_b32_e64 v7, s11, v0, s1 ; GFX11-NEXT: v_dual_mov_b32 v0, s8 :: v_dual_mov_b32 v1, s9 ; GFX11-NEXT: v_dual_mov_b32 v2, s10 :: v_dual_mov_b32 v3, s11 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_3) @@ -3251,9 +3251,14 @@ define amdgpu_ps void @insertelement_s_v16i8_s_v(ptr addrspace(4) inreg %ptr, i8 ; GFX11-NEXT: v_mov_b32_e32 v4, 0 ; GFX11-NEXT: v_mov_b32_e32 v5, 0 ; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, v7, s2 +<<<<<<< HEAD ; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc_lo ; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, v7, s0 ; GFX11-NEXT: v_cndmask_b32_e64 v3, v3, v7, s1 +======= +; GFX11-NEXT: v_cndmask_b32_e64 v2, v7, v2, s0 +; GFX11-NEXT: v_cndmask_b32_e64 v3, v7, v3, s1 +>>>>>>> 51c61e76cef3 (added sgpr case, refactoring) ; GFX11-NEXT: global_store_b128 v[4:5], v[0:3], off ; GFX11-NEXT: s_endpgm %vec = load <16 x i8>, ptr addrspace(4) %ptr @@ -3381,8 +3386,8 @@ define amdgpu_ps void @insertelement_s_v16i8_v_v(ptr addrspace(4) inreg %ptr, i8 ; GFX10-NEXT: v_and_b32_e32 v2, 3, v1 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v6 ; GFX10-NEXT: v_lshlrev_b32_e32 v2, 3, v2 -; GFX10-NEXT: v_cmp_eq_u32_e64 s0, 2, v6 -; GFX10-NEXT: v_cmp_eq_u32_e64 s1, 3, v6 +; GFX10-NEXT: v_cmp_ne_u32_e64 s0, 2, v6 +; GFX10-NEXT: v_cmp_ne_u32_e64 s1, 3, v6 ; GFX10-NEXT: s_mov_b32 null, 0 ; GFX10-NEXT: v_cmp_eq_u32_e64 s2, 0, v6 ; GFX10-NEXT: v_lshlrev_b32_e64 v3, v2, 0xff @@ -3391,8 +3396,8 @@ define amdgpu_ps void @insertelement_s_v16i8_v_v(ptr addrspace(4) inreg %ptr, i8 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-NEXT: v_cndmask_b32_e32 v1, s4, v1, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s6, s0 -; GFX10-NEXT: v_cndmask_b32_e64 v7, v1, s7, s1 +; GFX10-NEXT: v_cndmask_b32_e64 v1, s6, v1, s0 +; GFX10-NEXT: v_cndmask_b32_e64 v7, s7, v1, s1 ; GFX10-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-NEXT: v_mov_b32_e32 v2, s6 @@ -3402,8 +3407,8 @@ define amdgpu_ps void @insertelement_s_v16i8_v_v(ptr addrspace(4) inreg %ptr, i8 ; GFX10-NEXT: v_mov_b32_e32 v5, 0 ; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, v7, s2 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, v7, s0 -; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, v7, s1 +; GFX10-NEXT: v_cndmask_b32_e64 v2, v7, v2, s0 +; GFX10-NEXT: v_cndmask_b32_e64 v3, v7, v3, s1 ; GFX10-NEXT: global_store_dwordx4 v[4:5], v[0:3], off ; GFX10-NEXT: s_endpgm ; @@ -3415,8 +3420,8 @@ define amdgpu_ps void @insertelement_s_v16i8_v_v(ptr addrspace(4) inreg %ptr, i8 ; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v6 -; GFX11-NEXT: v_cmp_eq_u32_e64 s0, 2, v6 -; GFX11-NEXT: v_cmp_eq_u32_e64 s1, 3, v6 +; GFX11-NEXT: v_cmp_ne_u32_e64 s0, 2, v6 +; GFX11-NEXT: v_cmp_ne_u32_e64 s1, 3, v6 ; GFX11-NEXT: v_cmp_eq_u32_e64 s2, 0, v6 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: v_dual_mov_b32 v1, s5 :: v_dual_lshlrev_b32 v2, 3, v2 @@ -3425,10 +3430,10 @@ define amdgpu_ps void @insertelement_s_v16i8_v_v(ptr addrspace(4) inreg %ptr, i8 ; GFX11-NEXT: v_lshlrev_b32_e64 v3, v2, 0xff ; GFX11-NEXT: v_lshlrev_b32_e32 v4, v2, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s6, s0 +; GFX11-NEXT: v_cndmask_b32_e64 v1, s6, v1, s0 ; GFX11-NEXT: v_not_b32_e32 v5, v3 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-NEXT: v_cndmask_b32_e64 v7, v1, s7, s1 +; GFX11-NEXT: v_cndmask_b32_e64 v7, s7, v1, s1 ; GFX11-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5 ; GFX11-NEXT: v_mov_b32_e32 v3, s7 ; GFX11-NEXT: v_and_or_b32 v7, v7, v5, v4 @@ -3437,9 +3442,14 @@ define amdgpu_ps void @insertelement_s_v16i8_v_v(ptr addrspace(4) inreg %ptr, i8 ; GFX11-NEXT: v_mov_b32_e32 v5, 0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) ; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, v7, s2 +<<<<<<< HEAD ; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc_lo ; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, v7, s0 ; GFX11-NEXT: v_cndmask_b32_e64 v3, v3, v7, s1 +======= +; GFX11-NEXT: v_cndmask_b32_e64 v2, v7, v2, s0 +; GFX11-NEXT: v_cndmask_b32_e64 v3, v7, v3, s1 +>>>>>>> 51c61e76cef3 (added sgpr case, refactoring) ; GFX11-NEXT: global_store_b128 v[4:5], v[0:3], off ; GFX11-NEXT: s_endpgm %vec = load <16 x i8>, ptr addrspace(4) %ptr diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll index 8134eb3ca2afc..2c4f9d84cba11 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll @@ -451,22 +451,22 @@ define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_v_s_v(<8 x float> %vec, fl ; ; GFX10PLUS-LABEL: dyn_insertelement_v8f32_v_s_v: ; GFX10PLUS: ; %bb.0: ; %entry -; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v8 -; GFX10PLUS-NEXT: v_cndmask_b32_e64 v0, v0, s2, vcc_lo -; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v8 -; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s2, vcc_lo -; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v8 -; GFX10PLUS-NEXT: v_cndmask_b32_e64 v2, v2, s2, vcc_lo -; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v8 -; GFX10PLUS-NEXT: v_cndmask_b32_e64 v3, v3, s2, vcc_lo -; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v8 -; GFX10PLUS-NEXT: v_cndmask_b32_e64 v4, v4, s2, vcc_lo -; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v8 -; GFX10PLUS-NEXT: v_cndmask_b32_e64 v5, v5, s2, vcc_lo -; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v8 -; GFX10PLUS-NEXT: v_cndmask_b32_e64 v6, v6, s2, vcc_lo -; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v8 -; GFX10PLUS-NEXT: v_cndmask_b32_e64 v7, v7, s2, vcc_lo +; GFX10PLUS-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v8 +; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, s2, v0, vcc_lo +; GFX10PLUS-NEXT: v_cmp_ne_u32_e32 vcc_lo, 1, v8 +; GFX10PLUS-NEXT: v_cndmask_b32_e32 v1, s2, v1, vcc_lo +; GFX10PLUS-NEXT: v_cmp_ne_u32_e32 vcc_lo, 2, v8 +; GFX10PLUS-NEXT: v_cndmask_b32_e32 v2, s2, v2, vcc_lo +; GFX10PLUS-NEXT: v_cmp_ne_u32_e32 vcc_lo, 3, v8 +; GFX10PLUS-NEXT: v_cndmask_b32_e32 v3, s2, v3, vcc_lo +; GFX10PLUS-NEXT: v_cmp_ne_u32_e32 vcc_lo, 4, v8 +; GFX10PLUS-NEXT: v_cndmask_b32_e32 v4, s2, v4, vcc_lo +; GFX10PLUS-NEXT: v_cmp_ne_u32_e32 vcc_lo, 5, v8 +; GFX10PLUS-NEXT: v_cndmask_b32_e32 v5, s2, v5, vcc_lo +; GFX10PLUS-NEXT: v_cmp_ne_u32_e32 vcc_lo, 6, v8 +; GFX10PLUS-NEXT: v_cndmask_b32_e32 v6, s2, v6, vcc_lo +; GFX10PLUS-NEXT: v_cmp_ne_u32_e32 vcc_lo, 7, v8 +; GFX10PLUS-NEXT: v_cndmask_b32_e32 v7, s2, v7, vcc_lo ; GFX10PLUS-NEXT: ; return to shader part epilog entry: %insert = insertelement <8 x float> %vec, float %val, i32 %idx @@ -973,7 +973,7 @@ define amdgpu_ps void @dyn_insertelement_v8f64_s_s_v(<8 x double> inreg %vec, do ; GFX10-NEXT: v_mov_b32_e32 v16, s15 ; GFX10-NEXT: v_mov_b32_e32 v2, s1 ; GFX10-NEXT: v_mov_b32_e32 v1, s0 -; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 ; GFX10-NEXT: v_mov_b32_e32 v15, s14 ; GFX10-NEXT: v_mov_b32_e32 v14, s13 ; GFX10-NEXT: v_mov_b32_e32 v13, s12 @@ -987,29 +987,29 @@ define amdgpu_ps void @dyn_insertelement_v8f64_s_s_v(<8 x double> inreg %vec, do ; GFX10-NEXT: v_mov_b32_e32 v5, s4 ; GFX10-NEXT: v_mov_b32_e32 v4, s3 ; GFX10-NEXT: v_mov_b32_e32 v3, s2 -; GFX10-NEXT: v_cmp_eq_u32_e64 s0, 1, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s18, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s19, vcc_lo -; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 -; GFX10-NEXT: v_cmp_eq_u32_e64 s1, 7, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, s18, s0 -; GFX10-NEXT: v_cndmask_b32_e64 v4, v4, s19, s0 -; GFX10-NEXT: v_cmp_eq_u32_e64 s0, 3, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v5, v5, s18, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v6, v6, s19, vcc_lo -; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v15, v15, s18, s1 -; GFX10-NEXT: v_cndmask_b32_e64 v7, v7, s18, s0 -; GFX10-NEXT: v_cndmask_b32_e64 v8, v8, s19, s0 -; GFX10-NEXT: v_cmp_eq_u32_e64 s0, 5, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v9, v9, s18, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v10, v10, s19, vcc_lo -; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v16, v16, s19, s1 -; GFX10-NEXT: v_cndmask_b32_e64 v11, v11, s18, s0 -; GFX10-NEXT: v_cndmask_b32_e64 v12, v12, s19, s0 -; GFX10-NEXT: v_cndmask_b32_e64 v13, v13, s18, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v14, v14, s19, vcc_lo +; GFX10-NEXT: v_cmp_ne_u32_e64 s0, 1, v0 +; GFX10-NEXT: v_cndmask_b32_e32 v1, s18, v1, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v2, s19, v2, vcc_lo +; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 2, v0 +; GFX10-NEXT: v_cmp_ne_u32_e64 s1, 7, v0 +; GFX10-NEXT: v_cndmask_b32_e64 v3, s18, v3, s0 +; GFX10-NEXT: v_cndmask_b32_e64 v4, s19, v4, s0 +; GFX10-NEXT: v_cmp_ne_u32_e64 s0, 3, v0 +; GFX10-NEXT: v_cndmask_b32_e32 v5, s18, v5, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v6, s19, v6, vcc_lo +; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 4, v0 +; GFX10-NEXT: v_cndmask_b32_e64 v15, s18, v15, s1 +; GFX10-NEXT: v_cndmask_b32_e64 v7, s18, v7, s0 +; GFX10-NEXT: v_cndmask_b32_e64 v8, s19, v8, s0 +; GFX10-NEXT: v_cmp_ne_u32_e64 s0, 5, v0 +; GFX10-NEXT: v_cndmask_b32_e32 v9, s18, v9, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v10, s19, v10, vcc_lo +; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 6, v0 +; GFX10-NEXT: v_cndmask_b32_e64 v16, s19, v16, s1 +; GFX10-NEXT: v_cndmask_b32_e64 v11, s18, v11, s0 +; GFX10-NEXT: v_cndmask_b32_e64 v12, s19, v12, s0 +; GFX10-NEXT: v_cndmask_b32_e32 v13, s18, v13, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v14, s19, v14, vcc_lo ; GFX10-NEXT: global_store_dwordx4 v[0:1], v[1:4], off ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_store_dwordx4 v[0:1], v[5:8], off @@ -1040,36 +1040,36 @@ define amdgpu_ps void @dyn_insertelement_v8f64_s_s_v(<8 x double> inreg %vec, do ; GFX11-NEXT: s_mov_b32 s14, s16 ; GFX11-NEXT: v_dual_mov_b32 v16, s15 :: v_dual_mov_b32 v15, s14 ; GFX11-NEXT: v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v1, s0 -; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 ; GFX11-NEXT: v_dual_mov_b32 v14, s13 :: v_dual_mov_b32 v13, s12 ; GFX11-NEXT: v_dual_mov_b32 v12, s11 :: v_dual_mov_b32 v11, s10 ; GFX11-NEXT: v_dual_mov_b32 v10, s9 :: v_dual_mov_b32 v9, s8 ; GFX11-NEXT: v_dual_mov_b32 v8, s7 :: v_dual_mov_b32 v7, s6 ; GFX11-NEXT: v_dual_mov_b32 v6, s5 :: v_dual_mov_b32 v5, s4 ; GFX11-NEXT: v_dual_mov_b32 v4, s3 :: v_dual_mov_b32 v3, s2 -; GFX11-NEXT: v_cmp_eq_u32_e64 s0, 1, v0 -; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s18, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s19, vcc_lo -; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 -; GFX11-NEXT: v_cmp_eq_u32_e64 s1, 7, v0 -; GFX11-NEXT: v_cndmask_b32_e64 v3, v3, s18, s0 -; GFX11-NEXT: v_cndmask_b32_e64 v4, v4, s19, s0 -; GFX11-NEXT: v_cmp_eq_u32_e64 s0, 3, v0 -; GFX11-NEXT: v_cndmask_b32_e64 v5, v5, s18, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v6, v6, s19, vcc_lo -; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 -; GFX11-NEXT: v_cndmask_b32_e64 v15, v15, s18, s1 -; GFX11-NEXT: v_cndmask_b32_e64 v7, v7, s18, s0 -; GFX11-NEXT: v_cndmask_b32_e64 v8, v8, s19, s0 -; GFX11-NEXT: v_cmp_eq_u32_e64 s0, 5, v0 -; GFX11-NEXT: v_cndmask_b32_e64 v9, v9, s18, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v10, v10, s19, vcc_lo -; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0 -; GFX11-NEXT: v_cndmask_b32_e64 v16, v16, s19, s1 -; GFX11-NEXT: v_cndmask_b32_e64 v11, v11, s18, s0 -; GFX11-NEXT: v_cndmask_b32_e64 v12, v12, s19, s0 -; GFX11-NEXT: v_cndmask_b32_e64 v13, v13, s18, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v14, v14, s19, vcc_lo +; GFX11-NEXT: v_cmp_ne_u32_e64 s0, 1, v0 +; GFX11-NEXT: v_cndmask_b32_e32 v1, s18, v1, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v2, s19, v2, vcc_lo +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 2, v0 +; GFX11-NEXT: v_cmp_ne_u32_e64 s1, 7, v0 +; GFX11-NEXT: v_cndmask_b32_e64 v3, s18, v3, s0 +; GFX11-NEXT: v_cndmask_b32_e64 v4, s19, v4, s0 +; GFX11-NEXT: v_cmp_ne_u32_e64 s0, 3, v0 +; GFX11-NEXT: v_cndmask_b32_e32 v5, s18, v5, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v6, s19, v6, vcc_lo +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 4, v0 +; GFX11-NEXT: v_cndmask_b32_e64 v15, s18, v15, s1 +; GFX11-NEXT: v_cndmask_b32_e64 v7, s18, v7, s0 +; GFX11-NEXT: v_cndmask_b32_e64 v8, s19, v8, s0 +; GFX11-NEXT: v_cmp_ne_u32_e64 s0, 5, v0 +; GFX11-NEXT: v_cndmask_b32_e32 v9, s18, v9, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v10, s19, v10, vcc_lo +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 6, v0 +; GFX11-NEXT: v_cndmask_b32_e64 v16, s19, v16, s1 +; GFX11-NEXT: v_cndmask_b32_e64 v11, s18, v11, s0 +; GFX11-NEXT: v_cndmask_b32_e64 v12, s19, v12, s0 +; GFX11-NEXT: v_cndmask_b32_e32 v13, s18, v13, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v14, s19, v14, vcc_lo ; GFX11-NEXT: global_store_b128 v[0:1], v[1:4], off dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_store_b128 v[0:1], v[5:8], off dlc @@ -1546,30 +1546,30 @@ define amdgpu_ps void @dyn_insertelement_v8f64_v_s_v(<8 x double> %vec, double i ; ; GFX10-LABEL: dyn_insertelement_v8f64_v_s_v: ; GFX10: ; %bb.0: ; %entry -; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v16 -; GFX10-NEXT: v_cmp_eq_u32_e64 s0, 1, v16 -; GFX10-NEXT: v_cmp_eq_u32_e64 s1, 7, v16 -; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, s2, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s3, vcc_lo -; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v16 -; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s2, s0 -; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, s3, s0 -; GFX10-NEXT: v_cmp_eq_u32_e64 s0, 3, v16 -; GFX10-NEXT: v_cndmask_b32_e64 v14, v14, s2, s1 -; GFX10-NEXT: v_cndmask_b32_e64 v4, v4, s2, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v5, v5, s3, vcc_lo -; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v16 -; GFX10-NEXT: v_cndmask_b32_e64 v6, v6, s2, s0 -; GFX10-NEXT: v_cndmask_b32_e64 v7, v7, s3, s0 -; GFX10-NEXT: v_cmp_eq_u32_e64 s0, 5, v16 -; GFX10-NEXT: v_cndmask_b32_e64 v15, v15, s3, s1 -; GFX10-NEXT: v_cndmask_b32_e64 v8, v8, s2, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v9, v9, s3, vcc_lo -; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v16 -; GFX10-NEXT: v_cndmask_b32_e64 v10, v10, s2, s0 -; GFX10-NEXT: v_cndmask_b32_e64 v11, v11, s3, s0 -; GFX10-NEXT: v_cndmask_b32_e64 v12, v12, s2, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v13, v13, s3, vcc_lo +; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v16 +; GFX10-NEXT: v_cmp_ne_u32_e64 s0, 1, v16 +; GFX10-NEXT: v_cmp_ne_u32_e64 s1, 7, v16 +; GFX10-NEXT: v_cndmask_b32_e32 v0, s2, v0, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v1, s3, v1, vcc_lo +; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 2, v16 +; GFX10-NEXT: v_cndmask_b32_e64 v2, s2, v2, s0 +; GFX10-NEXT: v_cndmask_b32_e64 v3, s3, v3, s0 +; GFX10-NEXT: v_cmp_ne_u32_e64 s0, 3, v16 +; GFX10-NEXT: v_cndmask_b32_e64 v14, s2, v14, s1 +; GFX10-NEXT: v_cndmask_b32_e32 v4, s2, v4, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v5, s3, v5, vcc_lo +; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 4, v16 +; GFX10-NEXT: v_cndmask_b32_e64 v6, s2, v6, s0 +; GFX10-NEXT: v_cndmask_b32_e64 v7, s3, v7, s0 +; GFX10-NEXT: v_cmp_ne_u32_e64 s0, 5, v16 +; GFX10-NEXT: v_cndmask_b32_e64 v15, s3, v15, s1 +; GFX10-NEXT: v_cndmask_b32_e32 v8, s2, v8, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v9, s3, v9, vcc_lo +; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 6, v16 +; GFX10-NEXT: v_cndmask_b32_e64 v10, s2, v10, s0 +; GFX10-NEXT: v_cndmask_b32_e64 v11, s3, v11, s0 +; GFX10-NEXT: v_cndmask_b32_e32 v12, s2, v12, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v13, s3, v13, vcc_lo ; GFX10-NEXT: global_store_dwordx4 v[0:1], v[0:3], off ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_store_dwordx4 v[0:1], v[4:7], off @@ -1582,30 +1582,30 @@ define amdgpu_ps void @dyn_insertelement_v8f64_v_s_v(<8 x double> %vec, double i ; ; GFX11-LABEL: dyn_insertelement_v8f64_v_s_v: ; GFX11: ; %bb.0: ; %entry -; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v16 -; GFX11-NEXT: v_cmp_eq_u32_e64 s0, 1, v16 -; GFX11-NEXT: v_cmp_eq_u32_e64 s1, 7, v16 -; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, s2, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s3, vcc_lo -; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v16 -; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s2, s0 -; GFX11-NEXT: v_cndmask_b32_e64 v3, v3, s3, s0 -; GFX11-NEXT: v_cmp_eq_u32_e64 s0, 3, v16 -; GFX11-NEXT: v_cndmask_b32_e64 v14, v14, s2, s1 -; GFX11-NEXT: v_cndmask_b32_e64 v4, v4, s2, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v5, v5, s3, vcc_lo -; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v16 -; GFX11-NEXT: v_cndmask_b32_e64 v6, v6, s2, s0 -; GFX11-NEXT: v_cndmask_b32_e64 v7, v7, s3, s0 -; GFX11-NEXT: v_cmp_eq_u32_e64 s0, 5, v16 -; GFX11-NEXT: v_cndmask_b32_e64 v15, v15, s3, s1 -; GFX11-NEXT: v_cndmask_b32_e64 v8, v8, s2, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v9, v9, s3, vcc_lo -; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v16 -; GFX11-NEXT: v_cndmask_b32_e64 v10, v10, s2, s0 -; GFX11-NEXT: v_cndmask_b32_e64 v11, v11, s3, s0 -; GFX11-NEXT: v_cndmask_b32_e64 v12, v12, s2, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v13, v13, s3, vcc_lo +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v16 +; GFX11-NEXT: v_cmp_ne_u32_e64 s0, 1, v16 +; GFX11-NEXT: v_cmp_ne_u32_e64 s1, 7, v16 +; GFX11-NEXT: v_cndmask_b32_e32 v0, s2, v0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v1, s3, v1, vcc_lo +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 2, v16 +; GFX11-NEXT: v_cndmask_b32_e64 v2, s2, v2, s0 +; GFX11-NEXT: v_cndmask_b32_e64 v3, s3, v3, s0 +; GFX11-NEXT: v_cmp_ne_u32_e64 s0, 3, v16 +; GFX11-NEXT: v_cndmask_b32_e64 v14, s2, v14, s1 +; GFX11-NEXT: v_cndmask_b32_e32 v4, s2, v4, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v5, s3, v5, vcc_lo +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 4, v16 +; GFX11-NEXT: v_cndmask_b32_e64 v6, s2, v6, s0 +; GFX11-NEXT: v_cndmask_b32_e64 v7, s3, v7, s0 +; GFX11-NEXT: v_cmp_ne_u32_e64 s0, 5, v16 +; GFX11-NEXT: v_cndmask_b32_e64 v15, s3, v15, s1 +; GFX11-NEXT: v_cndmask_b32_e32 v8, s2, v8, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v9, s3, v9, vcc_lo +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 6, v16 +; GFX11-NEXT: v_cndmask_b32_e64 v10, s2, v10, s0 +; GFX11-NEXT: v_cndmask_b32_e64 v11, s3, v11, s0 +; GFX11-NEXT: v_cndmask_b32_e32 v12, s2, v12, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v13, s3, v13, vcc_lo ; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off dlc diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll index 832f066adaa84..50148f02fa52d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll @@ -5254,12 +5254,12 @@ define amdgpu_ps i128 @s_saddsat_i128(i128 inreg %lhs, i128 inreg %rhs) { ; GFX10-NEXT: v_xor_b32_e32 v0, v1, v0 ; GFX10-NEXT: v_mov_b32_e32 v1, s4 ; GFX10-NEXT: v_and_b32_e32 v0, 1, v0 -; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX10-NEXT: v_mov_b32_e32 v0, s8 -; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s0, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s0, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, s0, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, s1, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v1, s0, v1, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v2, s0, v2, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v0, s0, v0, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v3, s1, v3, vcc_lo ; GFX10-NEXT: v_readfirstlane_b32 s0, v1 ; GFX10-NEXT: v_readfirstlane_b32 s1, v2 ; GFX10-NEXT: v_readfirstlane_b32 s2, v0 @@ -5294,12 +5294,12 @@ define amdgpu_ps i128 @s_saddsat_i128(i128 inreg %lhs, i128 inreg %rhs) { ; GFX11-NEXT: s_add_i32 s1, s0, 0x80000000 ; GFX11-NEXT: v_xor_b32_e32 v0, v1, v0 ; GFX11-NEXT: v_dual_mov_b32 v1, s4 :: v_dual_and_b32 v0, 1, v0 -; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 +; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX11-NEXT: v_mov_b32_e32 v0, s8 -; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s0, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s0, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, s0, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v3, v3, s1, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v1, s0, v1, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v2, s0, v2, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v0, s0, v0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v3, s1, v3, vcc_lo ; GFX11-NEXT: v_readfirstlane_b32 s0, v1 ; GFX11-NEXT: v_readfirstlane_b32 s1, v2 ; GFX11-NEXT: v_readfirstlane_b32 s2, v0 @@ -5327,9 +5327,9 @@ define amdgpu_ps <4 x float> @saddsat_i128_sv(i128 inreg %lhs, i128 %rhs) { ; GFX6-NEXT: v_cndmask_b32_e32 v6, v7, v6, vcc ; GFX6-NEXT: v_cmp_gt_i64_e32 vcc, 0, v[2:3] ; GFX6-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GFX6-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[2:3] +; GFX6-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[2:3] ; GFX6-NEXT: v_ashrrev_i32_e32 v3, 31, v5 -; GFX6-NEXT: v_cndmask_b32_e64 v2, v7, 0, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v7, vcc ; GFX6-NEXT: v_xor_b32_e32 v2, v2, v6 ; GFX6-NEXT: v_bfrev_b32_e32 v6, 1 ; GFX6-NEXT: v_add_i32_e32 v6, vcc, v3, v6 @@ -5358,9 +5358,9 @@ define amdgpu_ps <4 x float> @saddsat_i128_sv(i128 inreg %lhs, i128 %rhs) { ; GFX8-NEXT: v_cndmask_b32_e32 v6, v7, v6, vcc ; GFX8-NEXT: v_cmp_gt_i64_e32 vcc, 0, v[2:3] ; GFX8-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GFX8-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[2:3] +; GFX8-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[2:3] ; GFX8-NEXT: v_ashrrev_i32_e32 v3, 31, v5 -; GFX8-NEXT: v_cndmask_b32_e64 v2, v7, 0, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v2, 0, v7, vcc ; GFX8-NEXT: v_xor_b32_e32 v2, v2, v6 ; GFX8-NEXT: v_bfrev_b32_e32 v6, 1 ; GFX8-NEXT: v_add_u32_e32 v6, vcc, v3, v6 @@ -5389,9 +5389,9 @@ define amdgpu_ps <4 x float> @saddsat_i128_sv(i128 inreg %lhs, i128 %rhs) { ; GFX9-NEXT: v_cndmask_b32_e32 v6, v7, v6, vcc ; GFX9-NEXT: v_cmp_gt_i64_e32 vcc, 0, v[2:3] ; GFX9-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[2:3] +; GFX9-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[2:3] ; GFX9-NEXT: v_ashrrev_i32_e32 v3, 31, v5 -; GFX9-NEXT: v_cndmask_b32_e64 v2, v7, 0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v2, 0, v7, vcc ; GFX9-NEXT: v_xor_b32_e32 v2, v2, v6 ; GFX9-NEXT: v_and_b32_e32 v2, 1, v2 ; GFX9-NEXT: v_add_u32_e32 v6, 0x80000000, v3 @@ -5416,9 +5416,9 @@ define amdgpu_ps <4 x float> @saddsat_i128_sv(i128 inreg %lhs, i128 %rhs) { ; GFX10-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc_lo ; GFX10-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[2:3], v[4:5] ; GFX10-NEXT: v_cndmask_b32_e32 v6, v7, v6, vcc_lo -; GFX10-NEXT: v_cmp_eq_u64_e32 vcc_lo, 0, v[2:3] +; GFX10-NEXT: v_cmp_ne_u64_e32 vcc_lo, 0, v[2:3] ; GFX10-NEXT: v_ashrrev_i32_e32 v3, 31, v5 -; GFX10-NEXT: v_cndmask_b32_e64 v2, v8, 0, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v2, 0, v8, vcc_lo ; GFX10-NEXT: v_xor_b32_e32 v2, v2, v6 ; GFX10-NEXT: v_add_nc_u32_e32 v6, 0x80000000, v3 ; GFX10-NEXT: v_and_b32_e32 v2, 1, v2 @@ -5443,9 +5443,9 @@ define amdgpu_ps <4 x float> @saddsat_i128_sv(i128 inreg %lhs, i128 %rhs) { ; GFX11-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc_lo ; GFX11-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[2:3], v[4:5] ; GFX11-NEXT: v_cndmask_b32_e32 v6, v7, v6, vcc_lo -; GFX11-NEXT: v_cmp_eq_u64_e32 vcc_lo, 0, v[2:3] +; GFX11-NEXT: v_cmp_ne_u64_e32 vcc_lo, 0, v[2:3] ; GFX11-NEXT: v_ashrrev_i32_e32 v3, 31, v5 -; GFX11-NEXT: v_cndmask_b32_e64 v2, v8, 0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v2, 0, v8, vcc_lo ; GFX11-NEXT: v_xor_b32_e32 v2, v2, v6 ; GFX11-NEXT: v_add_nc_u32_e32 v6, 0x80000000, v3 ; GFX11-NEXT: v_and_b32_e32 v2, 1, v2 @@ -5613,7 +5613,8 @@ define amdgpu_ps <4 x float> @saddsat_i128_vs(i128 %lhs, i128 inreg %rhs) { ; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v4, v2 :: v_dual_cndmask_b32 v3, v7, v3 ; GFX11-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc_lo +; GFX11-NEXT: v_dual_cndmask_b32 v2, v6, v2 :: v_dual_cndmask_b32 v3, v7, v3 ; GFX11-NEXT: ; return to shader part epilog %result = call i128 @llvm.sadd.sat.i128(i128 %lhs, i128 %rhs) %cast = bitcast i128 %result to <4 x float> @@ -5637,8 +5638,8 @@ define <2 x i128> @v_saddsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) { ; GFX6-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc ; GFX6-NEXT: v_cmp_gt_i64_e32 vcc, 0, v[10:11] ; GFX6-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc -; GFX6-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[10:11] -; GFX6-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; GFX6-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[10:11] +; GFX6-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GFX6-NEXT: v_xor_b32_e32 v0, v1, v0 ; GFX6-NEXT: v_bfrev_b32_e32 v1, 1 ; GFX6-NEXT: v_add_i32_e32 v3, vcc, v2, v1 @@ -5661,8 +5662,8 @@ define <2 x i128> @v_saddsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) { ; GFX6-NEXT: v_cndmask_b32_e32 v4, v5, v4, vcc ; GFX6-NEXT: v_cmp_gt_i64_e32 vcc, 0, v[14:15] ; GFX6-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GFX6-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[14:15] -; GFX6-NEXT: v_cndmask_b32_e64 v5, v5, 0, vcc +; GFX6-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[14:15] +; GFX6-NEXT: v_cndmask_b32_e32 v5, 0, v5, vcc ; GFX6-NEXT: v_xor_b32_e32 v4, v5, v4 ; GFX6-NEXT: v_add_i32_e32 v7, vcc, 0x80000000, v6 ; GFX6-NEXT: v_and_b32_e32 v4, 1, v4 @@ -5689,8 +5690,8 @@ define <2 x i128> @v_saddsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) { ; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc ; GFX8-NEXT: v_cmp_gt_i64_e32 vcc, 0, v[10:11] ; GFX8-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc -; GFX8-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[10:11] -; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; GFX8-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[10:11] +; GFX8-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GFX8-NEXT: v_xor_b32_e32 v0, v1, v0 ; GFX8-NEXT: v_bfrev_b32_e32 v1, 1 ; GFX8-NEXT: v_add_u32_e32 v3, vcc, v2, v1 @@ -5713,8 +5714,8 @@ define <2 x i128> @v_saddsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) { ; GFX8-NEXT: v_cndmask_b32_e32 v4, v5, v4, vcc ; GFX8-NEXT: v_cmp_gt_i64_e32 vcc, 0, v[14:15] ; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GFX8-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[14:15] -; GFX8-NEXT: v_cndmask_b32_e64 v5, v5, 0, vcc +; GFX8-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[14:15] +; GFX8-NEXT: v_cndmask_b32_e32 v5, 0, v5, vcc ; GFX8-NEXT: v_xor_b32_e32 v4, v5, v4 ; GFX8-NEXT: v_add_u32_e32 v7, vcc, 0x80000000, v6 ; GFX8-NEXT: v_and_b32_e32 v4, 1, v4 @@ -5742,8 +5743,8 @@ define <2 x i128> @v_saddsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) { ; GFX9-NEXT: v_cmp_gt_i64_e32 vcc, 0, v[10:11] ; GFX9-NEXT: v_add_u32_e32 v3, 0x80000000, v2 ; GFX9-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc -; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[10:11] -; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; GFX9-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[10:11] +; GFX9-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GFX9-NEXT: v_xor_b32_e32 v0, v1, v0 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 @@ -5765,8 +5766,8 @@ define <2 x i128> @v_saddsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) { ; GFX9-NEXT: v_cmp_gt_i64_e32 vcc, 0, v[14:15] ; GFX9-NEXT: v_add_u32_e32 v7, 0x80000000, v6 ; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[14:15] -; GFX9-NEXT: v_cndmask_b32_e64 v5, v5, 0, vcc +; GFX9-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[14:15] +; GFX9-NEXT: v_cndmask_b32_e32 v5, 0, v5, vcc ; GFX9-NEXT: v_xor_b32_e32 v4, v5, v4 ; GFX9-NEXT: v_and_b32_e32 v4, 1, v4 ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 @@ -5795,8 +5796,8 @@ define <2 x i128> @v_saddsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) { ; GFX10-NEXT: v_add_co_ci_u32_e32 v13, vcc_lo, v5, v13, vcc_lo ; GFX10-NEXT: v_add_co_ci_u32_e32 v18, vcc_lo, v6, v14, vcc_lo ; GFX10-NEXT: v_add_co_ci_u32_e32 v19, vcc_lo, v7, v15, vcc_lo -; GFX10-NEXT: v_cmp_eq_u64_e32 vcc_lo, 0, v[10:11] -; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc_lo +; GFX10-NEXT: v_cmp_ne_u64_e32 vcc_lo, 0, v[10:11] +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc_lo ; GFX10-NEXT: v_cmp_lt_u64_e32 vcc_lo, v[12:13], v[4:5] ; GFX10-NEXT: v_xor_b32_e32 v0, v1, v0 ; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo @@ -5808,9 +5809,9 @@ define <2 x i128> @v_saddsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) { ; GFX10-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[18:19], v[6:7] ; GFX10-NEXT: v_ashrrev_i32_e32 v6, 31, v19 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v3, v2, vcc_lo -; GFX10-NEXT: v_cmp_eq_u64_e32 vcc_lo, 0, v[14:15] +; GFX10-NEXT: v_cmp_ne_u64_e32 vcc_lo, 0, v[14:15] ; GFX10-NEXT: v_add_nc_u32_e32 v7, 0x80000000, v6 -; GFX10-NEXT: v_cndmask_b32_e64 v2, v4, 0, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc_lo ; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 ; GFX10-NEXT: v_xor_b32_e32 v1, v2, v1 ; GFX10-NEXT: v_ashrrev_i32_e32 v2, 31, v17 @@ -5846,8 +5847,8 @@ define <2 x i128> @v_saddsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) { ; GFX11-NEXT: v_add_co_ci_u32_e32 v13, vcc_lo, v5, v13, vcc_lo ; GFX11-NEXT: v_add_co_ci_u32_e32 v18, vcc_lo, v6, v14, vcc_lo ; GFX11-NEXT: v_add_co_ci_u32_e64 v19, null, v7, v15, vcc_lo -; GFX11-NEXT: v_cmp_eq_u64_e32 vcc_lo, 0, v[10:11] -; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc_lo +; GFX11-NEXT: v_cmp_ne_u64_e32 vcc_lo, 0, v[10:11] +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc_lo ; GFX11-NEXT: v_cmp_lt_u64_e32 vcc_lo, v[12:13], v[4:5] ; GFX11-NEXT: v_xor_b32_e32 v0, v1, v0 ; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo @@ -5858,9 +5859,8 @@ define <2 x i128> @v_saddsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) { ; GFX11-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[18:19], v[6:7] ; GFX11-NEXT: v_ashrrev_i32_e32 v6, 31, v19 ; GFX11-NEXT: v_cndmask_b32_e32 v1, v3, v2, vcc_lo -; GFX11-NEXT: v_cmp_eq_u64_e32 vcc_lo, 0, v[14:15] -; GFX11-NEXT: v_add_nc_u32_e32 v7, 0x80000000, v6 -; GFX11-NEXT: v_cndmask_b32_e64 v2, v4, 0, vcc_lo +; GFX11-NEXT: v_cmp_ne_u64_e32 vcc_lo, 0, v[14:15] +; GFX11-NEXT: v_dual_cndmask_b32 v2, 0, v4 :: v_dual_add_nc_u32 v7, 0x80000000, v6 ; GFX11-NEXT: v_xor_b32_e32 v1, v2, v1 ; GFX11-NEXT: v_ashrrev_i32_e32 v2, 31, v17 ; GFX11-NEXT: v_and_b32_e32 v0, 1, v0 @@ -6176,7 +6176,6 @@ define amdgpu_ps <2 x i128> @s_saddsat_v2i128(<2 x i128> inreg %lhs, <2 x i128> ; GFX10-NEXT: v_cmp_lt_i64_e64 s4, s[2:3], s[6:7] ; GFX10-NEXT: v_cmp_lt_i64_e64 s6, s[14:15], 0 ; GFX10-NEXT: v_and_b32_e32 v0, 1, v0 -; GFX10-NEXT: v_mov_b32_e32 v6, s2 ; GFX10-NEXT: v_mov_b32_e32 v7, s3 ; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, s4 ; GFX10-NEXT: s_and_b32 s4, 1, s12 @@ -6187,20 +6186,17 @@ define amdgpu_ps <2 x i128> @s_saddsat_v2i128(<2 x i128> inreg %lhs, <2 x i128> ; GFX10-NEXT: s_and_b32 s5, 1, s5 ; GFX10-NEXT: v_cmp_ne_u32_e64 s4, 0, s5 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc_lo -; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 -; GFX10-NEXT: v_mov_b32_e32 v0, s9 -; GFX10-NEXT: v_cndmask_b32_e64 v2, v3, 0, s4 -; GFX10-NEXT: v_mov_b32_e32 v3, s8 -; GFX10-NEXT: s_ashr_i32 s4, s3, 31 -; GFX10-NEXT: v_cndmask_b32_e64 v4, v4, s11, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, s10, vcc_lo +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX10-NEXT: v_mov_b32_e32 v0, s16 +<<<<<<< HEAD ; GFX10-NEXT: v_xor_b32_e32 v1, v2, v1 ; GFX10-NEXT: v_mov_b32_e32 v2, s16 ; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, s10, vcc_lo -; GFX10-NEXT: s_add_i32 s0, s4, 0x80000000 -; GFX10-NEXT: v_readfirstlane_b32 s3, v4 +; GFX10-NEXT: v_cndmask_b32_e64 v4, v4, s10, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, s10, vcc_lo ; GFX10-NEXT: v_and_b32_e32 v1, 1, v1 -; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s10, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s11, vcc_lo +; GFX10-NEXT: v_readfirstlane_b32 s1, v4 ; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1 ; GFX10-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-NEXT: v_readfirstlane_b32 s1, v0 @@ -6209,6 +6205,28 @@ define amdgpu_ps <2 x i128> @s_saddsat_v2i128(<2 x i128> inreg %lhs, <2 x i128> ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s4, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e64 v6, v6, s4, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e64 v7, v7, s0, vcc_lo +======= +; GFX10-NEXT: v_cndmask_b32_e64 v2, v3, 0, s4 +; GFX10-NEXT: v_mov_b32_e32 v3, s8 +; GFX10-NEXT: s_ashr_i32 s4, s3, 31 +; GFX10-NEXT: v_cndmask_b32_e32 v4, s10, v4, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v0, s10, v0, vcc_lo +; GFX10-NEXT: v_xor_b32_e32 v1, v2, v1 +; GFX10-NEXT: v_mov_b32_e32 v2, s17 +; GFX10-NEXT: v_cndmask_b32_e32 v3, s10, v3, vcc_lo +; GFX10-NEXT: s_add_i32 s0, s4, 0x80000000 +; GFX10-NEXT: v_readfirstlane_b32 s1, v4 +; GFX10-NEXT: v_and_b32_e32 v1, 1, v1 +; GFX10-NEXT: v_cndmask_b32_e32 v2, s11, v2, vcc_lo +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX10-NEXT: v_mov_b32_e32 v1, s2 +; GFX10-NEXT: v_readfirstlane_b32 s2, v0 +; GFX10-NEXT: v_readfirstlane_b32 s3, v2 +; GFX10-NEXT: v_cndmask_b32_e32 v5, s4, v5, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v6, s4, v6, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v1, s4, v1, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v7, s0, v7, vcc_lo +>>>>>>> 51c61e76cef3 (added sgpr case, refactoring) ; GFX10-NEXT: v_readfirstlane_b32 s0, v3 ; GFX10-NEXT: v_readfirstlane_b32 s4, v5 ; GFX10-NEXT: v_readfirstlane_b32 s5, v1 @@ -6247,13 +6265,9 @@ define amdgpu_ps <2 x i128> @s_saddsat_v2i128(<2 x i128> inreg %lhs, <2 x i128> ; GFX11-NEXT: s_addc_u32 s3, s7, s15 ; GFX11-NEXT: v_xor_b32_e32 v0, v1, v0 ; GFX11-NEXT: s_cmp_eq_u64 s[2:3], s[6:7] -; GFX11-NEXT: v_mov_b32_e32 v4, s17 -; GFX11-NEXT: s_cselect_b32 s12, 1, 0 ; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, s4 ; GFX11-NEXT: v_cmp_lt_i64_e64 s4, s[2:3], s[6:7] ; GFX11-NEXT: v_cmp_lt_i64_e64 s6, s[14:15], 0 -; GFX11-NEXT: v_and_b32_e32 v0, 1, v0 -; GFX11-NEXT: v_dual_mov_b32 v6, s2 :: v_dual_mov_b32 v7, s3 ; GFX11-NEXT: v_mov_b32_e32 v5, s0 ; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, s4 ; GFX11-NEXT: s_and_b32 s4, 1, s12 @@ -6266,18 +6280,17 @@ define amdgpu_ps <2 x i128> @s_saddsat_v2i128(<2 x i128> inreg %lhs, <2 x i128> ; GFX11-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc_lo ; GFX11-NEXT: v_cndmask_b32_e64 v2, v3, 0, s4 ; GFX11-NEXT: v_mov_b32_e32 v3, s8 -; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 -; GFX11-NEXT: s_ashr_i32 s4, s3, 31 +; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-NEXT: v_mov_b32_e32 v0, s16 ; GFX11-NEXT: v_xor_b32_e32 v1, v2, v1 +<<<<<<< HEAD ; GFX11-NEXT: v_mov_b32_e32 v0, s9 ; GFX11-NEXT: v_mov_b32_e32 v2, s16 ; GFX11-NEXT: v_cndmask_b32_e64 v3, v3, s10, vcc_lo ; GFX11-NEXT: v_cndmask_b32_e64 v4, v4, s11, vcc_lo ; GFX11-NEXT: v_and_b32_e32 v1, 1, v1 -; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, s10, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s10, vcc_lo -; GFX11-NEXT: s_add_i32 s0, s4, 0x80000000 -; GFX11-NEXT: v_readfirstlane_b32 s3, v4 +; GFX11-NEXT: v_cndmask_b32_e64 v4, v4, s10, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s11, vcc_lo ; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1 ; GFX11-NEXT: v_mov_b32_e32 v1, s1 ; GFX11-NEXT: v_readfirstlane_b32 s1, v0 @@ -6286,6 +6299,24 @@ define amdgpu_ps <2 x i128> @s_saddsat_v2i128(<2 x i128> inreg %lhs, <2 x i128> ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s4, vcc_lo ; GFX11-NEXT: v_cndmask_b32_e64 v6, v6, s4, vcc_lo ; GFX11-NEXT: v_cndmask_b32_e64 v7, v7, s0, vcc_lo +======= +; GFX11-NEXT: v_mov_b32_e32 v4, s9 +; GFX11-NEXT: v_mov_b32_e32 v2, s17 +; GFX11-NEXT: v_cndmask_b32_e32 v3, s10, v3, vcc_lo +; GFX11-NEXT: v_dual_cndmask_b32 v0, s10, v0 :: v_dual_and_b32 v1, 1, v1 +; GFX11-NEXT: v_cndmask_b32_e32 v4, s10, v4, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v2, s11, v2, vcc_lo +; GFX11-NEXT: s_add_i32 s0, s4, 0x80000000 +; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-NEXT: v_mov_b32_e32 v1, s2 +; GFX11-NEXT: v_readfirstlane_b32 s1, v4 +; GFX11-NEXT: v_readfirstlane_b32 s2, v0 +; GFX11-NEXT: v_readfirstlane_b32 s3, v2 +; GFX11-NEXT: v_cndmask_b32_e32 v5, s4, v5, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v6, s4, v6, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v1, s4, v1, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v7, s0, v7, vcc_lo +>>>>>>> 51c61e76cef3 (added sgpr case, refactoring) ; GFX11-NEXT: v_readfirstlane_b32 s0, v3 ; GFX11-NEXT: v_readfirstlane_b32 s4, v5 ; GFX11-NEXT: v_readfirstlane_b32 s5, v1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/select-to-fmin-fmax.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/select-to-fmin-fmax.ll index ee3bf96111994..22f2c47c0fddf 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/select-to-fmin-fmax.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/select-to-fmin-fmax.ll @@ -18,8 +18,8 @@ define float @test_s32(float %a) #0 { ; GCN-LABEL: test_s32: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_cmp_gt_f32_e32 vcc, 0, v0 -; GCN-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; GCN-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0 +; GCN-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc ; GCN-NEXT: s_setpc_b64 s[30:31] entry: %fcmp = fcmp olt float %a, 0.0 @@ -31,9 +31,9 @@ define double @test_s64(double %a) #0 { ; GCN-LABEL: test_s64: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_cmp_gt_f64_e32 vcc, 0, v[0:1] -; GCN-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc -; GCN-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; GCN-NEXT: v_cmp_ngt_f64_e32 vcc, 0, v[0:1] +; GCN-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; GCN-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GCN-NEXT: s_setpc_b64 s[30:31] entry: %fcmp = fcmp olt double %a, 0.0 @@ -111,10 +111,10 @@ define <2 x float> @test_v2s32(<2 x float> %a) #0 { ; GCN-LABEL: test_v2s32: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_cmp_gt_f32_e32 vcc, 0, v0 -; GCN-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc -; GCN-NEXT: v_cmp_gt_f32_e32 vcc, 0, v1 -; GCN-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; GCN-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0 +; GCN-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; GCN-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v1 +; GCN-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GCN-NEXT: s_setpc_b64 s[30:31] entry: %fcmp = fcmp olt <2 x float> %a, zeroinitializer @@ -126,14 +126,14 @@ define <4 x float> @test_v4s32(<4 x float> %a) #0 { ; GCN-LABEL: test_v4s32: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_cmp_gt_f32_e32 vcc, 0, v0 -; GCN-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc -; GCN-NEXT: v_cmp_gt_f32_e32 vcc, 0, v1 -; GCN-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc -; GCN-NEXT: v_cmp_gt_f32_e32 vcc, 0, v2 -; GCN-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc -; GCN-NEXT: v_cmp_gt_f32_e32 vcc, 0, v3 -; GCN-NEXT: v_cndmask_b32_e64 v3, v3, 0, vcc +; GCN-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0 +; GCN-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; GCN-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v1 +; GCN-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GCN-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v2 +; GCN-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GCN-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v3 +; GCN-NEXT: v_cndmask_b32_e32 v3, 0, v3, vcc ; GCN-NEXT: s_setpc_b64 s[30:31] entry: %fcmp = fcmp olt <4 x float> %a, zeroinitializer @@ -145,12 +145,12 @@ define <2 x double> @test_v2s64(<2 x double> %a) #0 { ; GCN-LABEL: test_v2s64: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_cmp_gt_f64_e32 vcc, 0, v[0:1] -; GCN-NEXT: v_cmp_gt_f64_e64 s[4:5], 0, v[2:3] -; GCN-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc -; GCN-NEXT: v_cndmask_b32_e64 v2, v2, 0, s[4:5] -; GCN-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc -; GCN-NEXT: v_cndmask_b32_e64 v3, v3, 0, s[4:5] +; GCN-NEXT: v_cmp_ngt_f64_e32 vcc, 0, v[0:1] +; GCN-NEXT: v_cmp_ngt_f64_e64 s[4:5], 0, v[2:3] +; GCN-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; GCN-NEXT: v_cndmask_b32_e64 v2, 0, v2, s[4:5] +; GCN-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GCN-NEXT: v_cndmask_b32_e64 v3, 0, v3, s[4:5] ; GCN-NEXT: s_setpc_b64 s[30:31] entry: %fcmp = fcmp olt <2 x double> %a, zeroinitializer diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll index d9158e3558395..536504747c971 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll @@ -2835,9 +2835,9 @@ define i48 @v_uaddsat_i48(i48 %lhs, i48 %rhs) { ; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v3 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v3 -; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, v1, v2 -; GFX6-NEXT: v_cndmask_b32_e64 v0, v0, -1, vcc -; GFX6-NEXT: v_cndmask_b32_e64 v1, v2, -1, vcc +; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX6-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v1, -1, v2, vcc ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_uaddsat_i48: @@ -2944,10 +2944,10 @@ define amdgpu_ps <2 x float> @uaddsat_i48_sv(i48 inreg %lhs, i48 %rhs) { ; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v0 ; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, v1, v2 +; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v3 -; GFX6-NEXT: v_cndmask_b32_e64 v1, v2, -1, vcc -; GFX6-NEXT: v_cndmask_b32_e64 v0, v0, -1, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v1, -1, v2, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc ; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1 ; GFX6-NEXT: ; return to shader part epilog ; @@ -3003,10 +3003,10 @@ define amdgpu_ps <2 x float> @uaddsat_i48_vs(i48 %lhs, i48 inreg %rhs) { ; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v0 ; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, v1, v2 +; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v3 -; GFX6-NEXT: v_cndmask_b32_e64 v1, v2, -1, vcc -; GFX6-NEXT: v_cndmask_b32_e64 v0, v0, -1, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v1, -1, v2, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc ; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1 ; GFX6-NEXT: ; return to shader part epilog ; diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/usubsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/usubsat.ll index 1fd139b06417f..1944d1577ae29 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/usubsat.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/usubsat.ll @@ -2705,9 +2705,9 @@ define i48 @v_usubsat_i48(i48 %lhs, i48 %rhs) { ; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v3 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v3 -; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, v1, v2 -; GFX6-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc -; GFX6-NEXT: v_cndmask_b32_e64 v1, v2, 0, vcc +; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX6-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_usubsat_i48: @@ -2815,9 +2815,9 @@ define amdgpu_ps <2 x float> @usubsat_i48_sv(i48 inreg %lhs, i48 %rhs) { ; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v3 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v3 -; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, v1, v2 -; GFX6-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc -; GFX6-NEXT: v_cndmask_b32_e64 v1, v2, 0, vcc +; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX6-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc ; GFX6-NEXT: ; return to shader part epilog ; ; GFX8-LABEL: usubsat_i48_sv: @@ -2873,9 +2873,9 @@ define amdgpu_ps <2 x float> @usubsat_i48_vs(i48 %lhs, i48 inreg %rhs) { ; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v3 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v3 -; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, v1, v2 -; GFX6-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc -; GFX6-NEXT: v_cndmask_b32_e64 v1, v2, 0, vcc +; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX6-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc ; GFX6-NEXT: ; return to shader part epilog ; ; GFX8-LABEL: usubsat_i48_vs: diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fold-binop-select.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fold-binop-select.ll index e71bf15384727..be4cf539ab38c 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fold-binop-select.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fold-binop-select.ll @@ -213,10 +213,10 @@ define i32 @select_sdiv_lhs_opaque_const1_i32(i1 %cond) { ; GCN-NEXT: s_addc_u32 s5, s5, gv@gotpcrel32@hi+12 ; GCN-NEXT: s_load_dword s4, s[4:5], 0x0 ; GCN-NEXT: v_and_b32_e32 v0, 1, v0 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 +; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 1, v0 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: v_mov_b32_e32 v1, s4 -; GCN-NEXT: v_cndmask_b32_e64 v0, v1, 5, vcc +; GCN-NEXT: v_cndmask_b32_e32 v0, 5, v1, vcc ; GCN-NEXT: v_sub_u32_e32 v1, vcc, 0, v0 ; GCN-NEXT: v_max_i32_e32 v1, v0, v1 ; GCN-NEXT: v_cvt_f32_u32_e32 v2, v1 diff --git a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll index 8e0b3cb9aa1d5..c12e8f136f3b6 100644 --- a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll +++ b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll @@ -11062,9 +11062,9 @@ define amdgpu_kernel void @max_i64_constant(ptr addrspace(1) %out) { ; GFX1064-NEXT: v_readfirstlane_b32 s2, v0 ; GFX1064-NEXT: v_cndmask_b32_e64 v1, 0, 0x80000000, vcc ; GFX1064-NEXT: v_cndmask_b32_e64 v0, 5, 0, vcc -; GFX1064-NEXT: v_cmp_gt_i64_e32 vcc, s[2:3], v[0:1] -; GFX1064-NEXT: v_cndmask_b32_e64 v1, v1, s3, vcc -; GFX1064-NEXT: v_cndmask_b32_e64 v0, v0, s2, vcc +; GFX1064-NEXT: v_cmp_le_i64_e32 vcc, s[2:3], v[0:1] +; GFX1064-NEXT: v_cndmask_b32_e32 v1, s3, v1, vcc +; GFX1064-NEXT: v_cndmask_b32_e32 v0, s2, v0, vcc ; GFX1064-NEXT: s_mov_b32 s3, 0x31016000 ; GFX1064-NEXT: s_mov_b32 s2, -1 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) @@ -11093,9 +11093,9 @@ define amdgpu_kernel void @max_i64_constant(ptr addrspace(1) %out) { ; GFX1032-NEXT: v_readfirstlane_b32 s2, v0 ; GFX1032-NEXT: v_cndmask_b32_e64 v1, 0, 0x80000000, vcc_lo ; GFX1032-NEXT: v_cndmask_b32_e64 v0, 5, 0, vcc_lo -; GFX1032-NEXT: v_cmp_gt_i64_e32 vcc_lo, s[2:3], v[0:1] -; GFX1032-NEXT: v_cndmask_b32_e64 v1, v1, s3, vcc_lo -; GFX1032-NEXT: v_cndmask_b32_e64 v0, v0, s2, vcc_lo +; GFX1032-NEXT: v_cmp_le_i64_e32 vcc_lo, s[2:3], v[0:1] +; GFX1032-NEXT: v_cndmask_b32_e32 v1, s3, v1, vcc_lo +; GFX1032-NEXT: v_cndmask_b32_e32 v0, s2, v0, vcc_lo ; GFX1032-NEXT: s_mov_b32 s3, 0x31016000 ; GFX1032-NEXT: s_mov_b32 s2, -1 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) @@ -11126,9 +11126,9 @@ define amdgpu_kernel void @max_i64_constant(ptr addrspace(1) %out) { ; GFX1164-NEXT: v_cndmask_b32_e64 v1, 0, 0x80000000, vcc ; GFX1164-NEXT: v_cndmask_b32_e64 v0, 5, 0, vcc ; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1164-NEXT: v_cmp_gt_i64_e32 vcc, s[2:3], v[0:1] -; GFX1164-NEXT: v_cndmask_b32_e64 v1, v1, s3, vcc -; GFX1164-NEXT: v_cndmask_b32_e64 v0, v0, s2, vcc +; GFX1164-NEXT: v_cmp_le_i64_e32 vcc, s[2:3], v[0:1] +; GFX1164-NEXT: v_cndmask_b32_e32 v1, s3, v1, vcc +; GFX1164-NEXT: v_cndmask_b32_e32 v0, s2, v0, vcc ; GFX1164-NEXT: s_mov_b32 s3, 0x31016000 ; GFX1164-NEXT: s_mov_b32 s2, -1 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) @@ -11157,9 +11157,9 @@ define amdgpu_kernel void @max_i64_constant(ptr addrspace(1) %out) { ; GFX1132-NEXT: v_cndmask_b32_e64 v1, 0, 0x80000000, vcc_lo ; GFX1132-NEXT: v_cndmask_b32_e64 v0, 5, 0, vcc_lo ; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1132-NEXT: v_cmp_gt_i64_e32 vcc_lo, s[2:3], v[0:1] -; GFX1132-NEXT: v_cndmask_b32_e64 v1, v1, s3, vcc_lo -; GFX1132-NEXT: v_cndmask_b32_e64 v0, v0, s2, vcc_lo +; GFX1132-NEXT: v_cmp_le_i64_e32 vcc_lo, s[2:3], v[0:1] +; GFX1132-NEXT: v_cndmask_b32_e32 v1, s3, v1, vcc_lo +; GFX1132-NEXT: v_cndmask_b32_e32 v0, s2, v0, vcc_lo ; GFX1132-NEXT: s_mov_b32 s3, 0x31016000 ; GFX1132-NEXT: s_mov_b32 s2, -1 ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) @@ -11383,9 +11383,9 @@ define amdgpu_kernel void @max_i64_varying(ptr addrspace(1) %out) { ; GFX1064_ITERATIVE-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX1064_ITERATIVE-NEXT: v_readfirstlane_b32 s3, v4 ; GFX1064_ITERATIVE-NEXT: v_readfirstlane_b32 s2, v3 -; GFX1064_ITERATIVE-NEXT: v_cmp_gt_i64_e32 vcc, s[2:3], v[1:2] -; GFX1064_ITERATIVE-NEXT: v_cndmask_b32_e64 v2, v2, s3, vcc -; GFX1064_ITERATIVE-NEXT: v_cndmask_b32_e64 v1, v1, s2, vcc +; GFX1064_ITERATIVE-NEXT: v_cmp_le_i64_e32 vcc, s[2:3], v[1:2] +; GFX1064_ITERATIVE-NEXT: v_cndmask_b32_e32 v2, s3, v2, vcc +; GFX1064_ITERATIVE-NEXT: v_cndmask_b32_e32 v1, s2, v1, vcc ; GFX1064_ITERATIVE-NEXT: s_mov_b32 s3, 0x31016000 ; GFX1064_ITERATIVE-NEXT: s_mov_b32 s2, -1 ; GFX1064_ITERATIVE-NEXT: s_waitcnt lgkmcnt(0) @@ -11434,9 +11434,9 @@ define amdgpu_kernel void @max_i64_varying(ptr addrspace(1) %out) { ; GFX1032_ITERATIVE-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX1032_ITERATIVE-NEXT: v_readfirstlane_b32 s3, v4 ; GFX1032_ITERATIVE-NEXT: v_readfirstlane_b32 s2, v3 -; GFX1032_ITERATIVE-NEXT: v_cmp_gt_i64_e32 vcc_lo, s[2:3], v[1:2] -; GFX1032_ITERATIVE-NEXT: v_cndmask_b32_e64 v2, v2, s3, vcc_lo -; GFX1032_ITERATIVE-NEXT: v_cndmask_b32_e64 v1, v1, s2, vcc_lo +; GFX1032_ITERATIVE-NEXT: v_cmp_le_i64_e32 vcc_lo, s[2:3], v[1:2] +; GFX1032_ITERATIVE-NEXT: v_cndmask_b32_e32 v2, s3, v2, vcc_lo +; GFX1032_ITERATIVE-NEXT: v_cndmask_b32_e32 v1, s2, v1, vcc_lo ; GFX1032_ITERATIVE-NEXT: s_mov_b32 s3, 0x31016000 ; GFX1032_ITERATIVE-NEXT: s_mov_b32 s2, -1 ; GFX1032_ITERATIVE-NEXT: s_waitcnt lgkmcnt(0) @@ -11492,9 +11492,9 @@ define amdgpu_kernel void @max_i64_varying(ptr addrspace(1) %out) { ; GFX1164_ITERATIVE-NEXT: v_readfirstlane_b32 s3, v3 ; GFX1164_ITERATIVE-NEXT: v_readfirstlane_b32 s2, v2 ; GFX1164_ITERATIVE-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1164_ITERATIVE-NEXT: v_cmp_gt_i64_e32 vcc, s[2:3], v[0:1] -; GFX1164_ITERATIVE-NEXT: v_cndmask_b32_e64 v1, v1, s3, vcc -; GFX1164_ITERATIVE-NEXT: v_cndmask_b32_e64 v0, v0, s2, vcc +; GFX1164_ITERATIVE-NEXT: v_cmp_le_i64_e32 vcc, s[2:3], v[0:1] +; GFX1164_ITERATIVE-NEXT: v_cndmask_b32_e32 v1, s3, v1, vcc +; GFX1164_ITERATIVE-NEXT: v_cndmask_b32_e32 v0, s2, v0, vcc ; GFX1164_ITERATIVE-NEXT: s_mov_b32 s3, 0x31016000 ; GFX1164_ITERATIVE-NEXT: s_mov_b32 s2, -1 ; GFX1164_ITERATIVE-NEXT: s_waitcnt lgkmcnt(0) @@ -11546,9 +11546,9 @@ define amdgpu_kernel void @max_i64_varying(ptr addrspace(1) %out) { ; GFX1132_ITERATIVE-NEXT: v_readfirstlane_b32 s3, v3 ; GFX1132_ITERATIVE-NEXT: v_readfirstlane_b32 s2, v2 ; GFX1132_ITERATIVE-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1132_ITERATIVE-NEXT: v_cmp_gt_i64_e32 vcc_lo, s[2:3], v[0:1] -; GFX1132_ITERATIVE-NEXT: v_cndmask_b32_e64 v1, v1, s3, vcc_lo -; GFX1132_ITERATIVE-NEXT: v_cndmask_b32_e64 v0, v0, s2, vcc_lo +; GFX1132_ITERATIVE-NEXT: v_cmp_le_i64_e32 vcc_lo, s[2:3], v[0:1] +; GFX1132_ITERATIVE-NEXT: v_cndmask_b32_e32 v1, s3, v1, vcc_lo +; GFX1132_ITERATIVE-NEXT: v_cndmask_b32_e32 v0, s2, v0, vcc_lo ; GFX1132_ITERATIVE-NEXT: s_mov_b32 s3, 0x31016000 ; GFX1132_ITERATIVE-NEXT: s_mov_b32 s2, -1 ; GFX1132_ITERATIVE-NEXT: s_waitcnt lgkmcnt(0) @@ -11848,9 +11848,9 @@ define amdgpu_kernel void @max_i64_varying(ptr addrspace(1) %out) { ; GFX1064_DPP-NEXT: v_mov_b32_e32 v7, v4 ; GFX1064_DPP-NEXT: v_mov_b32_e32 v8, v5 ; GFX1064_DPP-NEXT: s_mov_b32 s3, 0x31016000 -; GFX1064_DPP-NEXT: v_cmp_gt_i64_e32 vcc, s[4:5], v[7:8] -; GFX1064_DPP-NEXT: v_cndmask_b32_e64 v8, v8, s5, vcc -; GFX1064_DPP-NEXT: v_cndmask_b32_e64 v7, v7, s4, vcc +; GFX1064_DPP-NEXT: v_cmp_le_i64_e32 vcc, s[4:5], v[7:8] +; GFX1064_DPP-NEXT: v_cndmask_b32_e32 v8, s5, v8, vcc +; GFX1064_DPP-NEXT: v_cndmask_b32_e32 v7, s4, v7, vcc ; GFX1064_DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064_DPP-NEXT: buffer_store_dwordx2 v[7:8], off, s[0:3], 0 ; GFX1064_DPP-NEXT: s_endpgm @@ -11933,9 +11933,9 @@ define amdgpu_kernel void @max_i64_varying(ptr addrspace(1) %out) { ; GFX1032_DPP-NEXT: v_mov_b32_e32 v7, v4 ; GFX1032_DPP-NEXT: v_mov_b32_e32 v8, v5 ; GFX1032_DPP-NEXT: s_mov_b32 s3, 0x31016000 -; GFX1032_DPP-NEXT: v_cmp_gt_i64_e32 vcc_lo, s[4:5], v[7:8] -; GFX1032_DPP-NEXT: v_cndmask_b32_e64 v8, v8, s5, vcc_lo -; GFX1032_DPP-NEXT: v_cndmask_b32_e64 v7, v7, s4, vcc_lo +; GFX1032_DPP-NEXT: v_cmp_le_i64_e32 vcc_lo, s[4:5], v[7:8] +; GFX1032_DPP-NEXT: v_cndmask_b32_e32 v8, s5, v8, vcc_lo +; GFX1032_DPP-NEXT: v_cndmask_b32_e32 v7, s4, v7, vcc_lo ; GFX1032_DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032_DPP-NEXT: buffer_store_dwordx2 v[7:8], off, s[0:3], 0 ; GFX1032_DPP-NEXT: s_endpgm @@ -12055,9 +12055,9 @@ define amdgpu_kernel void @max_i64_varying(ptr addrspace(1) %out) { ; GFX1164_DPP-NEXT: v_mov_b32_e32 v8, v5 ; GFX1164_DPP-NEXT: s_mov_b32 s3, 0x31016000 ; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1164_DPP-NEXT: v_cmp_gt_i64_e32 vcc, s[4:5], v[7:8] -; GFX1164_DPP-NEXT: v_cndmask_b32_e64 v8, v8, s5, vcc -; GFX1164_DPP-NEXT: v_cndmask_b32_e64 v7, v7, s4, vcc +; GFX1164_DPP-NEXT: v_cmp_le_i64_e32 vcc, s[4:5], v[7:8] +; GFX1164_DPP-NEXT: v_cndmask_b32_e32 v8, s5, v8, vcc +; GFX1164_DPP-NEXT: v_cndmask_b32_e32 v7, s4, v7, vcc ; GFX1164_DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164_DPP-NEXT: buffer_store_b64 v[7:8], off, s[0:3], 0 ; GFX1164_DPP-NEXT: s_endpgm @@ -12145,9 +12145,9 @@ define amdgpu_kernel void @max_i64_varying(ptr addrspace(1) %out) { ; GFX1132_DPP-NEXT: v_mov_b32_e32 v8, v5 ; GFX1132_DPP-NEXT: s_mov_b32 s3, 0x31016000 ; GFX1132_DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1132_DPP-NEXT: v_cmp_gt_i64_e32 vcc_lo, s[4:5], v[7:8] -; GFX1132_DPP-NEXT: v_cndmask_b32_e64 v8, v8, s5, vcc_lo -; GFX1132_DPP-NEXT: v_cndmask_b32_e64 v7, v7, s4, vcc_lo +; GFX1132_DPP-NEXT: v_cmp_le_i64_e32 vcc_lo, s[4:5], v[7:8] +; GFX1132_DPP-NEXT: v_cndmask_b32_e32 v8, s5, v8, vcc_lo +; GFX1132_DPP-NEXT: v_cndmask_b32_e32 v7, s4, v7, vcc_lo ; GFX1132_DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132_DPP-NEXT: buffer_store_b64 v[7:8], off, s[0:3], 0 ; GFX1132_DPP-NEXT: s_endpgm @@ -12888,9 +12888,9 @@ define amdgpu_kernel void @min_i64_constant(ptr addrspace(1) %out) { ; GFX1064-NEXT: v_readfirstlane_b32 s2, v0 ; GFX1064-NEXT: v_cndmask_b32_e64 v1, 0, 0x7fffffff, vcc ; GFX1064-NEXT: v_cndmask_b32_e64 v0, 5, -1, vcc -; GFX1064-NEXT: v_cmp_lt_i64_e32 vcc, s[2:3], v[0:1] -; GFX1064-NEXT: v_cndmask_b32_e64 v1, v1, s3, vcc -; GFX1064-NEXT: v_cndmask_b32_e64 v0, v0, s2, vcc +; GFX1064-NEXT: v_cmp_ge_i64_e32 vcc, s[2:3], v[0:1] +; GFX1064-NEXT: v_cndmask_b32_e32 v1, s3, v1, vcc +; GFX1064-NEXT: v_cndmask_b32_e32 v0, s2, v0, vcc ; GFX1064-NEXT: s_mov_b32 s3, 0x31016000 ; GFX1064-NEXT: s_mov_b32 s2, -1 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) @@ -12919,9 +12919,9 @@ define amdgpu_kernel void @min_i64_constant(ptr addrspace(1) %out) { ; GFX1032-NEXT: v_readfirstlane_b32 s2, v0 ; GFX1032-NEXT: v_cndmask_b32_e64 v1, 0, 0x7fffffff, vcc_lo ; GFX1032-NEXT: v_cndmask_b32_e64 v0, 5, -1, vcc_lo -; GFX1032-NEXT: v_cmp_lt_i64_e32 vcc_lo, s[2:3], v[0:1] -; GFX1032-NEXT: v_cndmask_b32_e64 v1, v1, s3, vcc_lo -; GFX1032-NEXT: v_cndmask_b32_e64 v0, v0, s2, vcc_lo +; GFX1032-NEXT: v_cmp_ge_i64_e32 vcc_lo, s[2:3], v[0:1] +; GFX1032-NEXT: v_cndmask_b32_e32 v1, s3, v1, vcc_lo +; GFX1032-NEXT: v_cndmask_b32_e32 v0, s2, v0, vcc_lo ; GFX1032-NEXT: s_mov_b32 s3, 0x31016000 ; GFX1032-NEXT: s_mov_b32 s2, -1 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) @@ -12952,9 +12952,9 @@ define amdgpu_kernel void @min_i64_constant(ptr addrspace(1) %out) { ; GFX1164-NEXT: v_cndmask_b32_e64 v1, 0, 0x7fffffff, vcc ; GFX1164-NEXT: v_cndmask_b32_e64 v0, 5, -1, vcc ; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1164-NEXT: v_cmp_lt_i64_e32 vcc, s[2:3], v[0:1] -; GFX1164-NEXT: v_cndmask_b32_e64 v1, v1, s3, vcc -; GFX1164-NEXT: v_cndmask_b32_e64 v0, v0, s2, vcc +; GFX1164-NEXT: v_cmp_ge_i64_e32 vcc, s[2:3], v[0:1] +; GFX1164-NEXT: v_cndmask_b32_e32 v1, s3, v1, vcc +; GFX1164-NEXT: v_cndmask_b32_e32 v0, s2, v0, vcc ; GFX1164-NEXT: s_mov_b32 s3, 0x31016000 ; GFX1164-NEXT: s_mov_b32 s2, -1 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) @@ -12983,9 +12983,9 @@ define amdgpu_kernel void @min_i64_constant(ptr addrspace(1) %out) { ; GFX1132-NEXT: v_cndmask_b32_e64 v1, 0, 0x7fffffff, vcc_lo ; GFX1132-NEXT: v_cndmask_b32_e64 v0, 5, -1, vcc_lo ; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1132-NEXT: v_cmp_lt_i64_e32 vcc_lo, s[2:3], v[0:1] -; GFX1132-NEXT: v_cndmask_b32_e64 v1, v1, s3, vcc_lo -; GFX1132-NEXT: v_cndmask_b32_e64 v0, v0, s2, vcc_lo +; GFX1132-NEXT: v_cmp_ge_i64_e32 vcc_lo, s[2:3], v[0:1] +; GFX1132-NEXT: v_cndmask_b32_e32 v1, s3, v1, vcc_lo +; GFX1132-NEXT: v_cndmask_b32_e32 v0, s2, v0, vcc_lo ; GFX1132-NEXT: s_mov_b32 s3, 0x31016000 ; GFX1132-NEXT: s_mov_b32 s2, -1 ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) @@ -13209,9 +13209,9 @@ define amdgpu_kernel void @min_i64_varying(ptr addrspace(1) %out) { ; GFX1064_ITERATIVE-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX1064_ITERATIVE-NEXT: v_readfirstlane_b32 s3, v4 ; GFX1064_ITERATIVE-NEXT: v_readfirstlane_b32 s2, v3 -; GFX1064_ITERATIVE-NEXT: v_cmp_lt_i64_e32 vcc, s[2:3], v[1:2] -; GFX1064_ITERATIVE-NEXT: v_cndmask_b32_e64 v2, v2, s3, vcc -; GFX1064_ITERATIVE-NEXT: v_cndmask_b32_e64 v1, v1, s2, vcc +; GFX1064_ITERATIVE-NEXT: v_cmp_ge_i64_e32 vcc, s[2:3], v[1:2] +; GFX1064_ITERATIVE-NEXT: v_cndmask_b32_e32 v2, s3, v2, vcc +; GFX1064_ITERATIVE-NEXT: v_cndmask_b32_e32 v1, s2, v1, vcc ; GFX1064_ITERATIVE-NEXT: s_mov_b32 s3, 0x31016000 ; GFX1064_ITERATIVE-NEXT: s_mov_b32 s2, -1 ; GFX1064_ITERATIVE-NEXT: s_waitcnt lgkmcnt(0) @@ -13260,9 +13260,9 @@ define amdgpu_kernel void @min_i64_varying(ptr addrspace(1) %out) { ; GFX1032_ITERATIVE-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX1032_ITERATIVE-NEXT: v_readfirstlane_b32 s3, v4 ; GFX1032_ITERATIVE-NEXT: v_readfirstlane_b32 s2, v3 -; GFX1032_ITERATIVE-NEXT: v_cmp_lt_i64_e32 vcc_lo, s[2:3], v[1:2] -; GFX1032_ITERATIVE-NEXT: v_cndmask_b32_e64 v2, v2, s3, vcc_lo -; GFX1032_ITERATIVE-NEXT: v_cndmask_b32_e64 v1, v1, s2, vcc_lo +; GFX1032_ITERATIVE-NEXT: v_cmp_ge_i64_e32 vcc_lo, s[2:3], v[1:2] +; GFX1032_ITERATIVE-NEXT: v_cndmask_b32_e32 v2, s3, v2, vcc_lo +; GFX1032_ITERATIVE-NEXT: v_cndmask_b32_e32 v1, s2, v1, vcc_lo ; GFX1032_ITERATIVE-NEXT: s_mov_b32 s3, 0x31016000 ; GFX1032_ITERATIVE-NEXT: s_mov_b32 s2, -1 ; GFX1032_ITERATIVE-NEXT: s_waitcnt lgkmcnt(0) @@ -13318,9 +13318,9 @@ define amdgpu_kernel void @min_i64_varying(ptr addrspace(1) %out) { ; GFX1164_ITERATIVE-NEXT: v_readfirstlane_b32 s3, v3 ; GFX1164_ITERATIVE-NEXT: v_readfirstlane_b32 s2, v2 ; GFX1164_ITERATIVE-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1164_ITERATIVE-NEXT: v_cmp_lt_i64_e32 vcc, s[2:3], v[0:1] -; GFX1164_ITERATIVE-NEXT: v_cndmask_b32_e64 v1, v1, s3, vcc -; GFX1164_ITERATIVE-NEXT: v_cndmask_b32_e64 v0, v0, s2, vcc +; GFX1164_ITERATIVE-NEXT: v_cmp_ge_i64_e32 vcc, s[2:3], v[0:1] +; GFX1164_ITERATIVE-NEXT: v_cndmask_b32_e32 v1, s3, v1, vcc +; GFX1164_ITERATIVE-NEXT: v_cndmask_b32_e32 v0, s2, v0, vcc ; GFX1164_ITERATIVE-NEXT: s_mov_b32 s3, 0x31016000 ; GFX1164_ITERATIVE-NEXT: s_mov_b32 s2, -1 ; GFX1164_ITERATIVE-NEXT: s_waitcnt lgkmcnt(0) @@ -13372,9 +13372,9 @@ define amdgpu_kernel void @min_i64_varying(ptr addrspace(1) %out) { ; GFX1132_ITERATIVE-NEXT: v_readfirstlane_b32 s3, v3 ; GFX1132_ITERATIVE-NEXT: v_readfirstlane_b32 s2, v2 ; GFX1132_ITERATIVE-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1132_ITERATIVE-NEXT: v_cmp_lt_i64_e32 vcc_lo, s[2:3], v[0:1] -; GFX1132_ITERATIVE-NEXT: v_cndmask_b32_e64 v1, v1, s3, vcc_lo -; GFX1132_ITERATIVE-NEXT: v_cndmask_b32_e64 v0, v0, s2, vcc_lo +; GFX1132_ITERATIVE-NEXT: v_cmp_ge_i64_e32 vcc_lo, s[2:3], v[0:1] +; GFX1132_ITERATIVE-NEXT: v_cndmask_b32_e32 v1, s3, v1, vcc_lo +; GFX1132_ITERATIVE-NEXT: v_cndmask_b32_e32 v0, s2, v0, vcc_lo ; GFX1132_ITERATIVE-NEXT: s_mov_b32 s3, 0x31016000 ; GFX1132_ITERATIVE-NEXT: s_mov_b32 s2, -1 ; GFX1132_ITERATIVE-NEXT: s_waitcnt lgkmcnt(0) @@ -13674,9 +13674,9 @@ define amdgpu_kernel void @min_i64_varying(ptr addrspace(1) %out) { ; GFX1064_DPP-NEXT: v_mov_b32_e32 v7, v4 ; GFX1064_DPP-NEXT: v_mov_b32_e32 v8, v5 ; GFX1064_DPP-NEXT: s_mov_b32 s3, 0x31016000 -; GFX1064_DPP-NEXT: v_cmp_lt_i64_e32 vcc, s[4:5], v[7:8] -; GFX1064_DPP-NEXT: v_cndmask_b32_e64 v8, v8, s5, vcc -; GFX1064_DPP-NEXT: v_cndmask_b32_e64 v7, v7, s4, vcc +; GFX1064_DPP-NEXT: v_cmp_ge_i64_e32 vcc, s[4:5], v[7:8] +; GFX1064_DPP-NEXT: v_cndmask_b32_e32 v8, s5, v8, vcc +; GFX1064_DPP-NEXT: v_cndmask_b32_e32 v7, s4, v7, vcc ; GFX1064_DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064_DPP-NEXT: buffer_store_dwordx2 v[7:8], off, s[0:3], 0 ; GFX1064_DPP-NEXT: s_endpgm @@ -13759,9 +13759,9 @@ define amdgpu_kernel void @min_i64_varying(ptr addrspace(1) %out) { ; GFX1032_DPP-NEXT: v_mov_b32_e32 v7, v4 ; GFX1032_DPP-NEXT: v_mov_b32_e32 v8, v5 ; GFX1032_DPP-NEXT: s_mov_b32 s3, 0x31016000 -; GFX1032_DPP-NEXT: v_cmp_lt_i64_e32 vcc_lo, s[4:5], v[7:8] -; GFX1032_DPP-NEXT: v_cndmask_b32_e64 v8, v8, s5, vcc_lo -; GFX1032_DPP-NEXT: v_cndmask_b32_e64 v7, v7, s4, vcc_lo +; GFX1032_DPP-NEXT: v_cmp_ge_i64_e32 vcc_lo, s[4:5], v[7:8] +; GFX1032_DPP-NEXT: v_cndmask_b32_e32 v8, s5, v8, vcc_lo +; GFX1032_DPP-NEXT: v_cndmask_b32_e32 v7, s4, v7, vcc_lo ; GFX1032_DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032_DPP-NEXT: buffer_store_dwordx2 v[7:8], off, s[0:3], 0 ; GFX1032_DPP-NEXT: s_endpgm @@ -13881,9 +13881,9 @@ define amdgpu_kernel void @min_i64_varying(ptr addrspace(1) %out) { ; GFX1164_DPP-NEXT: v_mov_b32_e32 v8, v5 ; GFX1164_DPP-NEXT: s_mov_b32 s3, 0x31016000 ; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1164_DPP-NEXT: v_cmp_lt_i64_e32 vcc, s[4:5], v[7:8] -; GFX1164_DPP-NEXT: v_cndmask_b32_e64 v8, v8, s5, vcc -; GFX1164_DPP-NEXT: v_cndmask_b32_e64 v7, v7, s4, vcc +; GFX1164_DPP-NEXT: v_cmp_ge_i64_e32 vcc, s[4:5], v[7:8] +; GFX1164_DPP-NEXT: v_cndmask_b32_e32 v8, s5, v8, vcc +; GFX1164_DPP-NEXT: v_cndmask_b32_e32 v7, s4, v7, vcc ; GFX1164_DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164_DPP-NEXT: buffer_store_b64 v[7:8], off, s[0:3], 0 ; GFX1164_DPP-NEXT: s_endpgm @@ -13971,9 +13971,9 @@ define amdgpu_kernel void @min_i64_varying(ptr addrspace(1) %out) { ; GFX1132_DPP-NEXT: v_mov_b32_e32 v8, v5 ; GFX1132_DPP-NEXT: s_mov_b32 s3, 0x31016000 ; GFX1132_DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1132_DPP-NEXT: v_cmp_lt_i64_e32 vcc_lo, s[4:5], v[7:8] -; GFX1132_DPP-NEXT: v_cndmask_b32_e64 v8, v8, s5, vcc_lo -; GFX1132_DPP-NEXT: v_cndmask_b32_e64 v7, v7, s4, vcc_lo +; GFX1132_DPP-NEXT: v_cmp_ge_i64_e32 vcc_lo, s[4:5], v[7:8] +; GFX1132_DPP-NEXT: v_cndmask_b32_e32 v8, s5, v8, vcc_lo +; GFX1132_DPP-NEXT: v_cndmask_b32_e32 v7, s4, v7, vcc_lo ; GFX1132_DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132_DPP-NEXT: buffer_store_b64 v[7:8], off, s[0:3], 0 ; GFX1132_DPP-NEXT: s_endpgm @@ -14710,9 +14710,9 @@ define amdgpu_kernel void @umax_i64_constant(ptr addrspace(1) %out) { ; GFX1064-NEXT: v_readfirstlane_b32 s2, v0 ; GFX1064-NEXT: v_mov_b32_e32 v1, 0 ; GFX1064-NEXT: v_cndmask_b32_e64 v0, 5, 0, vcc -; GFX1064-NEXT: v_cmp_gt_u64_e32 vcc, s[2:3], v[0:1] -; GFX1064-NEXT: v_cndmask_b32_e64 v0, v0, s2, vcc -; GFX1064-NEXT: v_cndmask_b32_e64 v1, 0, s3, vcc +; GFX1064-NEXT: v_cmp_le_u64_e32 vcc, s[2:3], v[0:1] +; GFX1064-NEXT: v_cndmask_b32_e32 v0, s2, v0, vcc +; GFX1064-NEXT: v_cndmask_b32_e64 v1, s3, 0, vcc ; GFX1064-NEXT: s_mov_b32 s3, 0x31016000 ; GFX1064-NEXT: s_mov_b32 s2, -1 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) @@ -14741,9 +14741,9 @@ define amdgpu_kernel void @umax_i64_constant(ptr addrspace(1) %out) { ; GFX1032-NEXT: v_readfirstlane_b32 s2, v0 ; GFX1032-NEXT: v_mov_b32_e32 v1, 0 ; GFX1032-NEXT: v_cndmask_b32_e64 v0, 5, 0, vcc_lo -; GFX1032-NEXT: v_cmp_gt_u64_e32 vcc_lo, s[2:3], v[0:1] -; GFX1032-NEXT: v_cndmask_b32_e64 v0, v0, s2, vcc_lo -; GFX1032-NEXT: v_cndmask_b32_e64 v1, 0, s3, vcc_lo +; GFX1032-NEXT: v_cmp_le_u64_e32 vcc_lo, s[2:3], v[0:1] +; GFX1032-NEXT: v_cndmask_b32_e32 v0, s2, v0, vcc_lo +; GFX1032-NEXT: v_cndmask_b32_e64 v1, s3, 0, vcc_lo ; GFX1032-NEXT: s_mov_b32 s3, 0x31016000 ; GFX1032-NEXT: s_mov_b32 s2, -1 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) @@ -14774,9 +14774,9 @@ define amdgpu_kernel void @umax_i64_constant(ptr addrspace(1) %out) { ; GFX1164-NEXT: v_mov_b32_e32 v1, 0 ; GFX1164-NEXT: v_cndmask_b32_e64 v0, 5, 0, vcc ; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1164-NEXT: v_cmp_gt_u64_e32 vcc, s[2:3], v[0:1] -; GFX1164-NEXT: v_cndmask_b32_e64 v0, v0, s2, vcc -; GFX1164-NEXT: v_cndmask_b32_e64 v1, 0, s3, vcc +; GFX1164-NEXT: v_cmp_le_u64_e32 vcc, s[2:3], v[0:1] +; GFX1164-NEXT: v_cndmask_b32_e32 v0, s2, v0, vcc +; GFX1164-NEXT: v_cndmask_b32_e64 v1, s3, 0, vcc ; GFX1164-NEXT: s_mov_b32 s3, 0x31016000 ; GFX1164-NEXT: s_mov_b32 s2, -1 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) @@ -14802,12 +14802,12 @@ define amdgpu_kernel void @umax_i64_constant(ptr addrspace(1) %out) { ; GFX1132-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 ; GFX1132-NEXT: v_readfirstlane_b32 s3, v1 ; GFX1132-NEXT: v_readfirstlane_b32 s2, v0 -; GFX1132-NEXT: v_mov_b32_e32 v1, 0 ; GFX1132-NEXT: v_cndmask_b32_e64 v0, 5, 0, vcc_lo +; GFX1132-NEXT: v_mov_b32_e32 v1, 0 ; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1132-NEXT: v_cmp_gt_u64_e32 vcc_lo, s[2:3], v[0:1] -; GFX1132-NEXT: v_cndmask_b32_e64 v0, v0, s2, vcc_lo -; GFX1132-NEXT: v_cndmask_b32_e64 v1, 0, s3, vcc_lo +; GFX1132-NEXT: v_cmp_le_u64_e32 vcc_lo, s[2:3], v[0:1] +; GFX1132-NEXT: v_cndmask_b32_e32 v0, s2, v0, vcc_lo +; GFX1132-NEXT: v_cndmask_b32_e64 v1, s3, 0, vcc_lo ; GFX1132-NEXT: s_mov_b32 s3, 0x31016000 ; GFX1132-NEXT: s_mov_b32 s2, -1 ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) @@ -15027,9 +15027,9 @@ define amdgpu_kernel void @umax_i64_varying(ptr addrspace(1) %out) { ; GFX1064_ITERATIVE-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX1064_ITERATIVE-NEXT: v_readfirstlane_b32 s3, v4 ; GFX1064_ITERATIVE-NEXT: v_readfirstlane_b32 s2, v3 -; GFX1064_ITERATIVE-NEXT: v_cmp_gt_u64_e32 vcc, s[2:3], v[1:2] -; GFX1064_ITERATIVE-NEXT: v_cndmask_b32_e64 v2, v2, s3, vcc -; GFX1064_ITERATIVE-NEXT: v_cndmask_b32_e64 v1, v1, s2, vcc +; GFX1064_ITERATIVE-NEXT: v_cmp_le_u64_e32 vcc, s[2:3], v[1:2] +; GFX1064_ITERATIVE-NEXT: v_cndmask_b32_e32 v2, s3, v2, vcc +; GFX1064_ITERATIVE-NEXT: v_cndmask_b32_e32 v1, s2, v1, vcc ; GFX1064_ITERATIVE-NEXT: s_mov_b32 s3, 0x31016000 ; GFX1064_ITERATIVE-NEXT: s_mov_b32 s2, -1 ; GFX1064_ITERATIVE-NEXT: s_waitcnt lgkmcnt(0) @@ -15077,9 +15077,9 @@ define amdgpu_kernel void @umax_i64_varying(ptr addrspace(1) %out) { ; GFX1032_ITERATIVE-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX1032_ITERATIVE-NEXT: v_readfirstlane_b32 s3, v4 ; GFX1032_ITERATIVE-NEXT: v_readfirstlane_b32 s2, v3 -; GFX1032_ITERATIVE-NEXT: v_cmp_gt_u64_e32 vcc_lo, s[2:3], v[1:2] -; GFX1032_ITERATIVE-NEXT: v_cndmask_b32_e64 v2, v2, s3, vcc_lo -; GFX1032_ITERATIVE-NEXT: v_cndmask_b32_e64 v1, v1, s2, vcc_lo +; GFX1032_ITERATIVE-NEXT: v_cmp_le_u64_e32 vcc_lo, s[2:3], v[1:2] +; GFX1032_ITERATIVE-NEXT: v_cndmask_b32_e32 v2, s3, v2, vcc_lo +; GFX1032_ITERATIVE-NEXT: v_cndmask_b32_e32 v1, s2, v1, vcc_lo ; GFX1032_ITERATIVE-NEXT: s_mov_b32 s3, 0x31016000 ; GFX1032_ITERATIVE-NEXT: s_mov_b32 s2, -1 ; GFX1032_ITERATIVE-NEXT: s_waitcnt lgkmcnt(0) @@ -15135,9 +15135,9 @@ define amdgpu_kernel void @umax_i64_varying(ptr addrspace(1) %out) { ; GFX1164_ITERATIVE-NEXT: v_readfirstlane_b32 s3, v3 ; GFX1164_ITERATIVE-NEXT: v_readfirstlane_b32 s2, v2 ; GFX1164_ITERATIVE-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1164_ITERATIVE-NEXT: v_cmp_gt_u64_e32 vcc, s[2:3], v[0:1] -; GFX1164_ITERATIVE-NEXT: v_cndmask_b32_e64 v1, v1, s3, vcc -; GFX1164_ITERATIVE-NEXT: v_cndmask_b32_e64 v0, v0, s2, vcc +; GFX1164_ITERATIVE-NEXT: v_cmp_le_u64_e32 vcc, s[2:3], v[0:1] +; GFX1164_ITERATIVE-NEXT: v_cndmask_b32_e32 v1, s3, v1, vcc +; GFX1164_ITERATIVE-NEXT: v_cndmask_b32_e32 v0, s2, v0, vcc ; GFX1164_ITERATIVE-NEXT: s_mov_b32 s3, 0x31016000 ; GFX1164_ITERATIVE-NEXT: s_mov_b32 s2, -1 ; GFX1164_ITERATIVE-NEXT: s_waitcnt lgkmcnt(0) @@ -15189,9 +15189,9 @@ define amdgpu_kernel void @umax_i64_varying(ptr addrspace(1) %out) { ; GFX1132_ITERATIVE-NEXT: v_readfirstlane_b32 s3, v3 ; GFX1132_ITERATIVE-NEXT: v_readfirstlane_b32 s2, v2 ; GFX1132_ITERATIVE-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1132_ITERATIVE-NEXT: v_cmp_gt_u64_e32 vcc_lo, s[2:3], v[0:1] -; GFX1132_ITERATIVE-NEXT: v_cndmask_b32_e64 v1, v1, s3, vcc_lo -; GFX1132_ITERATIVE-NEXT: v_cndmask_b32_e64 v0, v0, s2, vcc_lo +; GFX1132_ITERATIVE-NEXT: v_cmp_le_u64_e32 vcc_lo, s[2:3], v[0:1] +; GFX1132_ITERATIVE-NEXT: v_cndmask_b32_e32 v1, s3, v1, vcc_lo +; GFX1132_ITERATIVE-NEXT: v_cndmask_b32_e32 v0, s2, v0, vcc_lo ; GFX1132_ITERATIVE-NEXT: s_mov_b32 s3, 0x31016000 ; GFX1132_ITERATIVE-NEXT: s_mov_b32 s2, -1 ; GFX1132_ITERATIVE-NEXT: s_waitcnt lgkmcnt(0) @@ -15493,9 +15493,9 @@ define amdgpu_kernel void @umax_i64_varying(ptr addrspace(1) %out) { ; GFX1064_DPP-NEXT: v_mov_b32_e32 v7, v4 ; GFX1064_DPP-NEXT: v_mov_b32_e32 v8, v5 ; GFX1064_DPP-NEXT: s_mov_b32 s3, 0x31016000 -; GFX1064_DPP-NEXT: v_cmp_gt_u64_e32 vcc, s[4:5], v[7:8] -; GFX1064_DPP-NEXT: v_cndmask_b32_e64 v8, v8, s5, vcc -; GFX1064_DPP-NEXT: v_cndmask_b32_e64 v7, v7, s4, vcc +; GFX1064_DPP-NEXT: v_cmp_le_u64_e32 vcc, s[4:5], v[7:8] +; GFX1064_DPP-NEXT: v_cndmask_b32_e32 v8, s5, v8, vcc +; GFX1064_DPP-NEXT: v_cndmask_b32_e32 v7, s4, v7, vcc ; GFX1064_DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064_DPP-NEXT: buffer_store_dwordx2 v[7:8], off, s[0:3], 0 ; GFX1064_DPP-NEXT: s_endpgm @@ -15578,9 +15578,9 @@ define amdgpu_kernel void @umax_i64_varying(ptr addrspace(1) %out) { ; GFX1032_DPP-NEXT: v_mov_b32_e32 v7, v4 ; GFX1032_DPP-NEXT: v_mov_b32_e32 v8, v5 ; GFX1032_DPP-NEXT: s_mov_b32 s3, 0x31016000 -; GFX1032_DPP-NEXT: v_cmp_gt_u64_e32 vcc_lo, s[4:5], v[7:8] -; GFX1032_DPP-NEXT: v_cndmask_b32_e64 v8, v8, s5, vcc_lo -; GFX1032_DPP-NEXT: v_cndmask_b32_e64 v7, v7, s4, vcc_lo +; GFX1032_DPP-NEXT: v_cmp_le_u64_e32 vcc_lo, s[4:5], v[7:8] +; GFX1032_DPP-NEXT: v_cndmask_b32_e32 v8, s5, v8, vcc_lo +; GFX1032_DPP-NEXT: v_cndmask_b32_e32 v7, s4, v7, vcc_lo ; GFX1032_DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032_DPP-NEXT: buffer_store_dwordx2 v[7:8], off, s[0:3], 0 ; GFX1032_DPP-NEXT: s_endpgm @@ -15700,9 +15700,9 @@ define amdgpu_kernel void @umax_i64_varying(ptr addrspace(1) %out) { ; GFX1164_DPP-NEXT: v_mov_b32_e32 v8, v5 ; GFX1164_DPP-NEXT: s_mov_b32 s3, 0x31016000 ; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1164_DPP-NEXT: v_cmp_gt_u64_e32 vcc, s[4:5], v[7:8] -; GFX1164_DPP-NEXT: v_cndmask_b32_e64 v8, v8, s5, vcc -; GFX1164_DPP-NEXT: v_cndmask_b32_e64 v7, v7, s4, vcc +; GFX1164_DPP-NEXT: v_cmp_le_u64_e32 vcc, s[4:5], v[7:8] +; GFX1164_DPP-NEXT: v_cndmask_b32_e32 v8, s5, v8, vcc +; GFX1164_DPP-NEXT: v_cndmask_b32_e32 v7, s4, v7, vcc ; GFX1164_DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164_DPP-NEXT: buffer_store_b64 v[7:8], off, s[0:3], 0 ; GFX1164_DPP-NEXT: s_endpgm @@ -15784,9 +15784,9 @@ define amdgpu_kernel void @umax_i64_varying(ptr addrspace(1) %out) { ; GFX1132_DPP-NEXT: v_mov_b32_e32 v8, v5 ; GFX1132_DPP-NEXT: s_mov_b32 s3, 0x31016000 ; GFX1132_DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1132_DPP-NEXT: v_cmp_gt_u64_e32 vcc_lo, s[4:5], v[7:8] -; GFX1132_DPP-NEXT: v_cndmask_b32_e64 v8, v8, s5, vcc_lo -; GFX1132_DPP-NEXT: v_cndmask_b32_e64 v7, v7, s4, vcc_lo +; GFX1132_DPP-NEXT: v_cmp_le_u64_e32 vcc_lo, s[4:5], v[7:8] +; GFX1132_DPP-NEXT: v_cndmask_b32_e32 v8, s5, v8, vcc_lo +; GFX1132_DPP-NEXT: v_cndmask_b32_e32 v7, s4, v7, vcc_lo ; GFX1132_DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132_DPP-NEXT: buffer_store_b64 v[7:8], off, s[0:3], 0 ; GFX1132_DPP-NEXT: s_endpgm @@ -16524,9 +16524,9 @@ define amdgpu_kernel void @umin_i64_constant(ptr addrspace(1) %out) { ; GFX1064-NEXT: v_readfirstlane_b32 s2, v0 ; GFX1064-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc ; GFX1064-NEXT: v_cndmask_b32_e64 v0, 5, -1, vcc -; GFX1064-NEXT: v_cmp_lt_u64_e32 vcc, s[2:3], v[0:1] -; GFX1064-NEXT: v_cndmask_b32_e64 v1, v1, s3, vcc -; GFX1064-NEXT: v_cndmask_b32_e64 v0, v0, s2, vcc +; GFX1064-NEXT: v_cmp_ge_u64_e32 vcc, s[2:3], v[0:1] +; GFX1064-NEXT: v_cndmask_b32_e32 v1, s3, v1, vcc +; GFX1064-NEXT: v_cndmask_b32_e32 v0, s2, v0, vcc ; GFX1064-NEXT: s_mov_b32 s3, 0x31016000 ; GFX1064-NEXT: s_mov_b32 s2, -1 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) @@ -16555,9 +16555,9 @@ define amdgpu_kernel void @umin_i64_constant(ptr addrspace(1) %out) { ; GFX1032-NEXT: v_readfirstlane_b32 s2, v0 ; GFX1032-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo ; GFX1032-NEXT: v_cndmask_b32_e64 v0, 5, -1, vcc_lo -; GFX1032-NEXT: v_cmp_lt_u64_e32 vcc_lo, s[2:3], v[0:1] -; GFX1032-NEXT: v_cndmask_b32_e64 v1, v1, s3, vcc_lo -; GFX1032-NEXT: v_cndmask_b32_e64 v0, v0, s2, vcc_lo +; GFX1032-NEXT: v_cmp_ge_u64_e32 vcc_lo, s[2:3], v[0:1] +; GFX1032-NEXT: v_cndmask_b32_e32 v1, s3, v1, vcc_lo +; GFX1032-NEXT: v_cndmask_b32_e32 v0, s2, v0, vcc_lo ; GFX1032-NEXT: s_mov_b32 s3, 0x31016000 ; GFX1032-NEXT: s_mov_b32 s2, -1 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) @@ -16588,9 +16588,9 @@ define amdgpu_kernel void @umin_i64_constant(ptr addrspace(1) %out) { ; GFX1164-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc ; GFX1164-NEXT: v_cndmask_b32_e64 v0, 5, -1, vcc ; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1164-NEXT: v_cmp_lt_u64_e32 vcc, s[2:3], v[0:1] -; GFX1164-NEXT: v_cndmask_b32_e64 v1, v1, s3, vcc -; GFX1164-NEXT: v_cndmask_b32_e64 v0, v0, s2, vcc +; GFX1164-NEXT: v_cmp_ge_u64_e32 vcc, s[2:3], v[0:1] +; GFX1164-NEXT: v_cndmask_b32_e32 v1, s3, v1, vcc +; GFX1164-NEXT: v_cndmask_b32_e32 v0, s2, v0, vcc ; GFX1164-NEXT: s_mov_b32 s3, 0x31016000 ; GFX1164-NEXT: s_mov_b32 s2, -1 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) @@ -16619,9 +16619,9 @@ define amdgpu_kernel void @umin_i64_constant(ptr addrspace(1) %out) { ; GFX1132-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo ; GFX1132-NEXT: v_cndmask_b32_e64 v0, 5, -1, vcc_lo ; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1132-NEXT: v_cmp_lt_u64_e32 vcc_lo, s[2:3], v[0:1] -; GFX1132-NEXT: v_cndmask_b32_e64 v1, v1, s3, vcc_lo -; GFX1132-NEXT: v_cndmask_b32_e64 v0, v0, s2, vcc_lo +; GFX1132-NEXT: v_cmp_ge_u64_e32 vcc_lo, s[2:3], v[0:1] +; GFX1132-NEXT: v_cndmask_b32_e32 v1, s3, v1, vcc_lo +; GFX1132-NEXT: v_cndmask_b32_e32 v0, s2, v0, vcc_lo ; GFX1132-NEXT: s_mov_b32 s3, 0x31016000 ; GFX1132-NEXT: s_mov_b32 s2, -1 ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) @@ -16841,9 +16841,9 @@ define amdgpu_kernel void @umin_i64_varying(ptr addrspace(1) %out) { ; GFX1064_ITERATIVE-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX1064_ITERATIVE-NEXT: v_readfirstlane_b32 s3, v4 ; GFX1064_ITERATIVE-NEXT: v_readfirstlane_b32 s2, v3 -; GFX1064_ITERATIVE-NEXT: v_cmp_lt_u64_e32 vcc, s[2:3], v[1:2] -; GFX1064_ITERATIVE-NEXT: v_cndmask_b32_e64 v2, v2, s3, vcc -; GFX1064_ITERATIVE-NEXT: v_cndmask_b32_e64 v1, v1, s2, vcc +; GFX1064_ITERATIVE-NEXT: v_cmp_ge_u64_e32 vcc, s[2:3], v[1:2] +; GFX1064_ITERATIVE-NEXT: v_cndmask_b32_e32 v2, s3, v2, vcc +; GFX1064_ITERATIVE-NEXT: v_cndmask_b32_e32 v1, s2, v1, vcc ; GFX1064_ITERATIVE-NEXT: s_mov_b32 s3, 0x31016000 ; GFX1064_ITERATIVE-NEXT: s_mov_b32 s2, -1 ; GFX1064_ITERATIVE-NEXT: s_waitcnt lgkmcnt(0) @@ -16891,9 +16891,9 @@ define amdgpu_kernel void @umin_i64_varying(ptr addrspace(1) %out) { ; GFX1032_ITERATIVE-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX1032_ITERATIVE-NEXT: v_readfirstlane_b32 s3, v4 ; GFX1032_ITERATIVE-NEXT: v_readfirstlane_b32 s2, v3 -; GFX1032_ITERATIVE-NEXT: v_cmp_lt_u64_e32 vcc_lo, s[2:3], v[1:2] -; GFX1032_ITERATIVE-NEXT: v_cndmask_b32_e64 v2, v2, s3, vcc_lo -; GFX1032_ITERATIVE-NEXT: v_cndmask_b32_e64 v1, v1, s2, vcc_lo +; GFX1032_ITERATIVE-NEXT: v_cmp_ge_u64_e32 vcc_lo, s[2:3], v[1:2] +; GFX1032_ITERATIVE-NEXT: v_cndmask_b32_e32 v2, s3, v2, vcc_lo +; GFX1032_ITERATIVE-NEXT: v_cndmask_b32_e32 v1, s2, v1, vcc_lo ; GFX1032_ITERATIVE-NEXT: s_mov_b32 s3, 0x31016000 ; GFX1032_ITERATIVE-NEXT: s_mov_b32 s2, -1 ; GFX1032_ITERATIVE-NEXT: s_waitcnt lgkmcnt(0) @@ -16949,9 +16949,9 @@ define amdgpu_kernel void @umin_i64_varying(ptr addrspace(1) %out) { ; GFX1164_ITERATIVE-NEXT: v_readfirstlane_b32 s3, v3 ; GFX1164_ITERATIVE-NEXT: v_readfirstlane_b32 s2, v2 ; GFX1164_ITERATIVE-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1164_ITERATIVE-NEXT: v_cmp_lt_u64_e32 vcc, s[2:3], v[0:1] -; GFX1164_ITERATIVE-NEXT: v_cndmask_b32_e64 v1, v1, s3, vcc -; GFX1164_ITERATIVE-NEXT: v_cndmask_b32_e64 v0, v0, s2, vcc +; GFX1164_ITERATIVE-NEXT: v_cmp_ge_u64_e32 vcc, s[2:3], v[0:1] +; GFX1164_ITERATIVE-NEXT: v_cndmask_b32_e32 v1, s3, v1, vcc +; GFX1164_ITERATIVE-NEXT: v_cndmask_b32_e32 v0, s2, v0, vcc ; GFX1164_ITERATIVE-NEXT: s_mov_b32 s3, 0x31016000 ; GFX1164_ITERATIVE-NEXT: s_mov_b32 s2, -1 ; GFX1164_ITERATIVE-NEXT: s_waitcnt lgkmcnt(0) @@ -17003,9 +17003,9 @@ define amdgpu_kernel void @umin_i64_varying(ptr addrspace(1) %out) { ; GFX1132_ITERATIVE-NEXT: v_readfirstlane_b32 s3, v3 ; GFX1132_ITERATIVE-NEXT: v_readfirstlane_b32 s2, v2 ; GFX1132_ITERATIVE-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1132_ITERATIVE-NEXT: v_cmp_lt_u64_e32 vcc_lo, s[2:3], v[0:1] -; GFX1132_ITERATIVE-NEXT: v_cndmask_b32_e64 v1, v1, s3, vcc_lo -; GFX1132_ITERATIVE-NEXT: v_cndmask_b32_e64 v0, v0, s2, vcc_lo +; GFX1132_ITERATIVE-NEXT: v_cmp_ge_u64_e32 vcc_lo, s[2:3], v[0:1] +; GFX1132_ITERATIVE-NEXT: v_cndmask_b32_e32 v1, s3, v1, vcc_lo +; GFX1132_ITERATIVE-NEXT: v_cndmask_b32_e32 v0, s2, v0, vcc_lo ; GFX1132_ITERATIVE-NEXT: s_mov_b32 s3, 0x31016000 ; GFX1132_ITERATIVE-NEXT: s_mov_b32 s2, -1 ; GFX1132_ITERATIVE-NEXT: s_waitcnt lgkmcnt(0) @@ -17305,9 +17305,9 @@ define amdgpu_kernel void @umin_i64_varying(ptr addrspace(1) %out) { ; GFX1064_DPP-NEXT: v_mov_b32_e32 v7, v4 ; GFX1064_DPP-NEXT: v_mov_b32_e32 v8, v5 ; GFX1064_DPP-NEXT: s_mov_b32 s3, 0x31016000 -; GFX1064_DPP-NEXT: v_cmp_lt_u64_e32 vcc, s[4:5], v[7:8] -; GFX1064_DPP-NEXT: v_cndmask_b32_e64 v8, v8, s5, vcc -; GFX1064_DPP-NEXT: v_cndmask_b32_e64 v7, v7, s4, vcc +; GFX1064_DPP-NEXT: v_cmp_ge_u64_e32 vcc, s[4:5], v[7:8] +; GFX1064_DPP-NEXT: v_cndmask_b32_e32 v8, s5, v8, vcc +; GFX1064_DPP-NEXT: v_cndmask_b32_e32 v7, s4, v7, vcc ; GFX1064_DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064_DPP-NEXT: buffer_store_dwordx2 v[7:8], off, s[0:3], 0 ; GFX1064_DPP-NEXT: s_endpgm @@ -17390,9 +17390,9 @@ define amdgpu_kernel void @umin_i64_varying(ptr addrspace(1) %out) { ; GFX1032_DPP-NEXT: v_mov_b32_e32 v7, v4 ; GFX1032_DPP-NEXT: v_mov_b32_e32 v8, v5 ; GFX1032_DPP-NEXT: s_mov_b32 s3, 0x31016000 -; GFX1032_DPP-NEXT: v_cmp_lt_u64_e32 vcc_lo, s[4:5], v[7:8] -; GFX1032_DPP-NEXT: v_cndmask_b32_e64 v8, v8, s5, vcc_lo -; GFX1032_DPP-NEXT: v_cndmask_b32_e64 v7, v7, s4, vcc_lo +; GFX1032_DPP-NEXT: v_cmp_ge_u64_e32 vcc_lo, s[4:5], v[7:8] +; GFX1032_DPP-NEXT: v_cndmask_b32_e32 v8, s5, v8, vcc_lo +; GFX1032_DPP-NEXT: v_cndmask_b32_e32 v7, s4, v7, vcc_lo ; GFX1032_DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032_DPP-NEXT: buffer_store_dwordx2 v[7:8], off, s[0:3], 0 ; GFX1032_DPP-NEXT: s_endpgm @@ -17512,9 +17512,9 @@ define amdgpu_kernel void @umin_i64_varying(ptr addrspace(1) %out) { ; GFX1164_DPP-NEXT: v_mov_b32_e32 v8, v5 ; GFX1164_DPP-NEXT: s_mov_b32 s3, 0x31016000 ; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1164_DPP-NEXT: v_cmp_lt_u64_e32 vcc, s[4:5], v[7:8] -; GFX1164_DPP-NEXT: v_cndmask_b32_e64 v8, v8, s5, vcc -; GFX1164_DPP-NEXT: v_cndmask_b32_e64 v7, v7, s4, vcc +; GFX1164_DPP-NEXT: v_cmp_ge_u64_e32 vcc, s[4:5], v[7:8] +; GFX1164_DPP-NEXT: v_cndmask_b32_e32 v8, s5, v8, vcc +; GFX1164_DPP-NEXT: v_cndmask_b32_e32 v7, s4, v7, vcc ; GFX1164_DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164_DPP-NEXT: buffer_store_b64 v[7:8], off, s[0:3], 0 ; GFX1164_DPP-NEXT: s_endpgm @@ -17596,9 +17596,9 @@ define amdgpu_kernel void @umin_i64_varying(ptr addrspace(1) %out) { ; GFX1132_DPP-NEXT: v_mov_b32_e32 v8, v5 ; GFX1132_DPP-NEXT: s_mov_b32 s3, 0x31016000 ; GFX1132_DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1132_DPP-NEXT: v_cmp_lt_u64_e32 vcc_lo, s[4:5], v[7:8] -; GFX1132_DPP-NEXT: v_cndmask_b32_e64 v8, v8, s5, vcc_lo -; GFX1132_DPP-NEXT: v_cndmask_b32_e64 v7, v7, s4, vcc_lo +; GFX1132_DPP-NEXT: v_cmp_ge_u64_e32 vcc_lo, s[4:5], v[7:8] +; GFX1132_DPP-NEXT: v_cndmask_b32_e32 v8, s5, v8, vcc_lo +; GFX1132_DPP-NEXT: v_cndmask_b32_e32 v7, s4, v7, vcc_lo ; GFX1132_DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132_DPP-NEXT: buffer_store_b64 v[7:8], off, s[0:3], 0 ; GFX1132_DPP-NEXT: s_endpgm diff --git a/llvm/test/CodeGen/AMDGPU/ctlz.ll b/llvm/test/CodeGen/AMDGPU/ctlz.ll index 52c90817dddd1..88f254aa144fd 100644 --- a/llvm/test/CodeGen/AMDGPU/ctlz.ll +++ b/llvm/test/CodeGen/AMDGPU/ctlz.ll @@ -1100,9 +1100,9 @@ define amdgpu_kernel void @v_ctlz_i32_sel_eq_neg1(ptr addrspace(1) noalias %out, ; GFX10-GISEL-NEXT: global_load_dword v0, v0, s[2:3] ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) ; GFX10-GISEL-NEXT: v_ffbh_u32_e32 v1, v0 -; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX10-GISEL-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 ; GFX10-GISEL-NEXT: v_min_u32_e32 v1, 32, v1 -; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, -1, vcc_lo +; GFX10-GISEL-NEXT: v_cndmask_b32_e32 v0, -1, v1, vcc_lo ; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-GISEL-NEXT: global_store_dword v1, v0, s[0:1] ; GFX10-GISEL-NEXT: s_endpgm @@ -1328,8 +1328,8 @@ define amdgpu_kernel void @v_ctlz_i32_sel_eq_bitwidth(ptr addrspace(1) noalias % ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) ; GFX10-GISEL-NEXT: v_ffbh_u32_e32 v0, v0 ; GFX10-GISEL-NEXT: v_min_u32_e32 v0, 32, v0 -; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 32, v0 -; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, -1, vcc_lo +; GFX10-GISEL-NEXT: v_cmp_ne_u32_e32 vcc_lo, 32, v0 +; GFX10-GISEL-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc_lo ; GFX10-GISEL-NEXT: global_store_dword v1, v0, s[0:1] ; GFX10-GISEL-NEXT: s_endpgm ; @@ -1565,10 +1565,10 @@ define amdgpu_kernel void @v_ctlz_i32_sel_ne_bitwidth(ptr addrspace(1) noalias % ; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) ; GFX10-GISEL-NEXT: v_ffbh_u32_e32 v1, v0 -; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX10-GISEL-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 ; GFX10-GISEL-NEXT: v_min_u32_e32 v1, 32, v1 ; GFX10-GISEL-NEXT: v_add_nc_u16 v1, 0xffe8, v1 -; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0xffff, vcc_lo +; GFX10-GISEL-NEXT: v_cndmask_b32_e32 v0, 0xffff, v1, vcc_lo ; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-GISEL-NEXT: global_store_byte v1, v0, s[0:1] ; GFX10-GISEL-NEXT: s_endpgm @@ -1675,11 +1675,12 @@ define amdgpu_kernel void @v_ctlz_i32_sel_ne_bitwidth(ptr addrspace(1) noalias % ; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-GISEL-NEXT: global_load_ushort v1, v0, s[2:3] ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX10-GISEL-NEXT: v_ffbh_u32_sdwa v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 -; GFX10-GISEL-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0, v1 +; GFX10-GISEL-NEXT: v_ffbh_u32_e32 v2, v1 +; GFX10-GISEL-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1 ; GFX10-GISEL-NEXT: v_min_u32_e32 v2, 32, v2 -; GFX10-GISEL-NEXT: v_add_nc_u16 v2, v2, -16 -; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, v2, 0xffff, vcc_lo +; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v2, -16, v2 +; GFX10-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2 +; GFX10-GISEL-NEXT: v_cndmask_b32_e32 v1, 0xffff, v2, vcc_lo ; GFX10-GISEL-NEXT: global_store_short v0, v1, s[0:1] ; GFX10-GISEL-NEXT: s_endpgm ; @@ -1790,10 +1791,10 @@ define amdgpu_kernel void @v_ctlz_i7_sel_eq_neg1(ptr addrspace(1) noalias %out, ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) ; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 0x7f, v0 ; GFX10-GISEL-NEXT: v_ffbh_u32_e32 v1, v0 -; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX10-GISEL-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 ; GFX10-GISEL-NEXT: v_min_u32_e32 v1, 32, v1 ; GFX10-GISEL-NEXT: v_add_nc_u16 v1, 0xffe7, v1 -; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0x7f, vcc_lo +; GFX10-GISEL-NEXT: v_cndmask_b32_e32 v0, 0x7f, v1, vcc_lo ; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 0x7f, v0 ; GFX10-GISEL-NEXT: global_store_byte v1, v0, s[0:1] diff --git a/llvm/test/CodeGen/AMDGPU/ctlz_zero_undef.ll b/llvm/test/CodeGen/AMDGPU/ctlz_zero_undef.ll index 9503ffbdb4104..21e9a43a1c265 100644 --- a/llvm/test/CodeGen/AMDGPU/ctlz_zero_undef.ll +++ b/llvm/test/CodeGen/AMDGPU/ctlz_zero_undef.ll @@ -1543,8 +1543,8 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_eq_neg1(ptr addrspace(1) no ; GFX9-GISEL-NEXT: global_load_dword v0, v0, s[2:3] ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) ; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v1, v0 -; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, -1, vcc +; GFX9-GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, -1, v1, vcc ; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-GISEL-NEXT: global_store_dword v1, v0, s[0:1] ; GFX9-GISEL-NEXT: s_endpgm @@ -1810,9 +1810,9 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_eq_neg1_two_use(ptr addrspa ; GFX9-GISEL-NEXT: global_load_dword v0, v0, s[2:3] ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) ; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v2, v0 -; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, v2, -1, vcc -; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX9-GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, -1, v2, vcc +; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v2, 1, 0, vcc ; GFX9-GISEL-NEXT: global_store_dword v1, v0, s[0:1] ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) ; GFX9-GISEL-NEXT: global_store_byte v[0:1], v2, off @@ -1898,8 +1898,8 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_eq_0(ptr addrspace(1) noali ; GFX9-GISEL-NEXT: global_load_dword v0, v0, s[2:3] ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) ; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v1, v0 -; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; GFX9-GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-GISEL-NEXT: global_store_dword v1, v0, s[0:1] ; GFX9-GISEL-NEXT: s_endpgm @@ -2067,8 +2067,8 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_eq_cmp_non0(ptr addrspace(1 ; GFX9-GISEL-NEXT: global_load_dword v0, v0, s[2:3] ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) ; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v1, v0 -; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; GFX9-GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 1, v0 +; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-GISEL-NEXT: global_store_dword v1, v0, s[0:1] ; GFX9-GISEL-NEXT: s_endpgm diff --git a/llvm/test/CodeGen/AMDGPU/cttz.ll b/llvm/test/CodeGen/AMDGPU/cttz.ll index 7f83fc571bf29..9888b2d23bbd3 100644 --- a/llvm/test/CodeGen/AMDGPU/cttz.ll +++ b/llvm/test/CodeGen/AMDGPU/cttz.ll @@ -951,9 +951,9 @@ define amdgpu_kernel void @v_cttz_i32_sel_eq_neg1(ptr addrspace(1) noalias %out, ; GFX10-GISEL-NEXT: global_load_dword v0, v0, s[2:3] ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) ; GFX10-GISEL-NEXT: v_ffbl_b32_e32 v1, v0 -; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX10-GISEL-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 ; GFX10-GISEL-NEXT: v_min_u32_e32 v1, 32, v1 -; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, -1, vcc_lo +; GFX10-GISEL-NEXT: v_cndmask_b32_e32 v0, -1, v1, vcc_lo ; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-GISEL-NEXT: global_store_dword v1, v0, s[0:1] ; GFX10-GISEL-NEXT: s_endpgm @@ -1153,8 +1153,8 @@ define amdgpu_kernel void @v_cttz_i32_sel_eq_bitwidth(ptr addrspace(1) noalias % ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) ; GFX10-GISEL-NEXT: v_ffbl_b32_e32 v0, v0 ; GFX10-GISEL-NEXT: v_min_u32_e32 v0, 32, v0 -; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 32, v0 -; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, -1, vcc_lo +; GFX10-GISEL-NEXT: v_cmp_ne_u32_e32 vcc_lo, 32, v0 +; GFX10-GISEL-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc_lo ; GFX10-GISEL-NEXT: global_store_dword v1, v0, s[0:1] ; GFX10-GISEL-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -1558,8 +1558,8 @@ define amdgpu_kernel void @v_cttz_i7_sel_eq_neg1(ptr addrspace(1) noalias %out, ; GFX10-GISEL-NEXT: v_or_b32_e32 v1, 0x80, v0 ; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 0x7f, v0 ; GFX10-GISEL-NEXT: v_ffbl_b32_e32 v1, v1 -; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0x7f, vcc_lo +; GFX10-GISEL-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 +; GFX10-GISEL-NEXT: v_cndmask_b32_e32 v0, 0x7f, v1, vcc_lo ; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 0x7f, v0 ; GFX10-GISEL-NEXT: global_store_byte v1, v0, s[0:1] diff --git a/llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll b/llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll index 97bcd8b5ee68a..bad55163e2840 100644 --- a/llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll +++ b/llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll @@ -1168,8 +1168,8 @@ define amdgpu_kernel void @v_cttz_i32_sel_eq_neg1(ptr addrspace(1) noalias %out, ; GFX9-GISEL-NEXT: v_or3_b32 v1, v2, v3, v1 ; GFX9-GISEL-NEXT: v_ffbl_b32_e32 v2, v1 ; GFX9-GISEL-NEXT: v_min_u32_e32 v2, 32, v2 -; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v1, v2, -1, vcc +; GFX9-GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 +; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, -1, v2, vcc ; GFX9-GISEL-NEXT: global_store_dword v0, v1, s[0:1] ; GFX9-GISEL-NEXT: s_endpgm %val = load i32, ptr addrspace(1) %arrayidx, align 1 diff --git a/llvm/test/CodeGen/AMDGPU/dagcombine-fmul-sel.ll b/llvm/test/CodeGen/AMDGPU/dagcombine-fmul-sel.ll index a511233af0703..19e4a373cec88 100644 --- a/llvm/test/CodeGen/AMDGPU/dagcombine-fmul-sel.ll +++ b/llvm/test/CodeGen/AMDGPU/dagcombine-fmul-sel.ll @@ -325,8 +325,8 @@ define float @fmul_select_f32_test6(float %x, i32 %bool.arg1, i32 %bool.arg2) { ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, 0x41000000 -; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 -; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, v3, 0xc0400000, vcc_lo +; GFX10-GISEL-NEXT: v_cmp_ne_u32_e32 vcc_lo, v1, v2 +; GFX10-GISEL-NEXT: v_cndmask_b32_e32 v1, 0xc0400000, v3, vcc_lo ; GFX10-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] ; @@ -344,9 +344,9 @@ define float @fmul_select_f32_test6(float %x, i32 %bool.arg1, i32 %bool.arg2) { ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 0x41000000 -; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX11-GISEL-NEXT: v_cmp_ne_u32_e32 vcc_lo, v1, v2 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, v3, 0xc0400000, vcc_lo +; GFX11-GISEL-NEXT: v_cndmask_b32_e32 v1, 0xc0400000, v3, vcc_lo ; GFX11-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %bool = icmp eq i32 %bool.arg1, %bool.arg2 @@ -360,8 +360,8 @@ define float @fmul_select_f32_test7_sel_log2val_pos59_pos92(float %x, i32 %bool. ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v3, 0x5c -; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 -; GFX7-NEXT: v_cndmask_b32_e64 v1, v3, 59, vcc +; GFX7-NEXT: v_cmp_ne_u32_e32 vcc, v1, v2 +; GFX7-NEXT: v_cndmask_b32_e32 v1, 59, v3, vcc ; GFX7-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; @@ -369,8 +369,8 @@ define float @fmul_select_f32_test7_sel_log2val_pos59_pos92(float %x, i32 %bool. ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v3, 0x5c -; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 -; GFX9-NEXT: v_cndmask_b32_e64 v1, v3, 59, vcc +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, v1, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v1, 59, v3, vcc ; GFX9-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -450,8 +450,8 @@ define float @fmul_select_f32_test8(float %x, i32 %bool.arg1, i32 %bool.arg2) { ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, 0xc1000000 -; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 -; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, v3, 0x41800000, vcc_lo +; GFX10-GISEL-NEXT: v_cmp_ne_u32_e32 vcc_lo, v1, v2 +; GFX10-GISEL-NEXT: v_cndmask_b32_e32 v1, 0x41800000, v3, vcc_lo ; GFX10-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] ; @@ -469,9 +469,9 @@ define float @fmul_select_f32_test8(float %x, i32 %bool.arg1, i32 %bool.arg2) { ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 0xc1000000 -; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX11-GISEL-NEXT: v_cmp_ne_u32_e32 vcc_lo, v1, v2 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, v3, 0x41800000, vcc_lo +; GFX11-GISEL-NEXT: v_cndmask_b32_e32 v1, 0x41800000, v3, vcc_lo ; GFX11-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %bool = icmp eq i32 %bool.arg1, %bool.arg2 @@ -655,8 +655,8 @@ define float @fmul_select_f32_test12_sel_log2val_neg48_pos68(float %x, i32 %bool ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, 0x44 -; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 -; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, v3, 0xffffffd0, vcc_lo +; GFX10-GISEL-NEXT: v_cmp_ne_u32_e32 vcc_lo, v1, v2 +; GFX10-GISEL-NEXT: v_cndmask_b32_e32 v1, 0xffffffd0, v3, vcc_lo ; GFX10-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] ; @@ -674,9 +674,9 @@ define float @fmul_select_f32_test12_sel_log2val_neg48_pos68(float %x, i32 %bool ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 0x44 -; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX11-GISEL-NEXT: v_cmp_ne_u32_e32 vcc_lo, v1, v2 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, v3, 0xffffffd0, vcc_lo +; GFX11-GISEL-NEXT: v_cndmask_b32_e32 v1, 0xffffffd0, v3, vcc_lo ; GFX11-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %bool = icmp eq i32 %bool.arg1, %bool.arg2 @@ -948,8 +948,8 @@ define double @fmul_select_f64_test7(double %x, i32 %bool.arg1, i32 %bool.arg2) ; GFX7-SDAG: ; %bb.0: ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-SDAG-NEXT: v_mov_b32_e32 v4, 0xbff00000 -; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v3, v4, 2.0, vcc +; GFX7-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, v2, v3 +; GFX7-SDAG-NEXT: v_cndmask_b32_e32 v3, 2.0, v4, vcc ; GFX7-SDAG-NEXT: v_mov_b32_e32 v2, 0 ; GFX7-SDAG-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] @@ -958,9 +958,9 @@ define double @fmul_select_f64_test7(double %x, i32 %bool.arg1, i32 %bool.arg2) ; GFX7-GISEL: ; %bb.0: ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-GISEL-NEXT: v_mov_b32_e32 v5, 0xbff00000 -; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX7-GISEL-NEXT: v_cmp_ne_u32_e32 vcc, v2, v3 ; GFX7-GISEL-NEXT: v_mov_b32_e32 v4, 0 -; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v5, v5, 2.0, vcc +; GFX7-GISEL-NEXT: v_cndmask_b32_e32 v5, 2.0, v5, vcc ; GFX7-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5] ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] ; @@ -968,8 +968,8 @@ define double @fmul_select_f64_test7(double %x, i32 %bool.arg1, i32 %bool.arg2) ; GFX9-SDAG: ; %bb.0: ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-SDAG-NEXT: v_mov_b32_e32 v4, 0xbff00000 -; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v3, v4, 2.0, vcc +; GFX9-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, v2, v3 +; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v3, 2.0, v4, vcc ; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, 0 ; GFX9-SDAG-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] @@ -978,9 +978,9 @@ define double @fmul_select_f64_test7(double %x, i32 %bool.arg1, i32 %bool.arg2) ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 0xbff00000 -; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX9-GISEL-NEXT: v_cmp_ne_u32_e32 vcc, v2, v3 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0 -; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v5, v5, 2.0, vcc +; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v5, 2.0, v5, vcc ; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5] ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; @@ -1172,9 +1172,9 @@ define <2 x double> @fmul_select_v2f64_test10(<2 x double> %x, <2 x i32> %bool.a ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-GISEL-NEXT: v_mov_b32_e32 v9, 0xbff00000 -; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6 +; GFX10-GISEL-NEXT: v_cmp_ne_u32_e32 vcc_lo, v4, v6 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v8, 0 -; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v9, v9, 0x3fe00000, vcc_lo +; GFX10-GISEL-NEXT: v_cndmask_b32_e32 v9, 0x3fe00000, v9, vcc_lo ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7 ; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[8:9] ; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc_lo @@ -1199,9 +1199,9 @@ define <2 x double> @fmul_select_v2f64_test10(<2 x double> %x, <2 x i32> %bool.a ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-GISEL-NEXT: v_dual_mov_b32 v9, 0xbff00000 :: v_dual_mov_b32 v8, 0 -; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6 +; GFX11-GISEL-NEXT: v_cmp_ne_u32_e32 vcc_lo, v4, v6 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v9, v9, 0x3fe00000, vcc_lo +; GFX11-GISEL-NEXT: v_cndmask_b32_e32 v9, 0x3fe00000, v9, vcc_lo ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7 ; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[8:9] ; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc_lo @@ -1219,8 +1219,8 @@ define double @fmul_select_f64_test11(double %x, i32 %bool.arg1, i32 %bool.arg2) ; GFX7-SDAG: ; %bb.0: ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-SDAG-NEXT: v_bfrev_b32_e32 v4, 1 -; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v3, v4, -2.0, vcc +; GFX7-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, v2, v3 +; GFX7-SDAG-NEXT: v_cndmask_b32_e32 v3, -2.0, v4, vcc ; GFX7-SDAG-NEXT: v_mov_b32_e32 v2, 0 ; GFX7-SDAG-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] @@ -1229,9 +1229,9 @@ define double @fmul_select_f64_test11(double %x, i32 %bool.arg1, i32 %bool.arg2) ; GFX7-GISEL: ; %bb.0: ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-GISEL-NEXT: v_bfrev_b32_e32 v5, 1 -; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX7-GISEL-NEXT: v_cmp_ne_u32_e32 vcc, v2, v3 ; GFX7-GISEL-NEXT: v_mov_b32_e32 v4, 0 -; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v5, v5, -2.0, vcc +; GFX7-GISEL-NEXT: v_cndmask_b32_e32 v5, -2.0, v5, vcc ; GFX7-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5] ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] ; @@ -1239,8 +1239,8 @@ define double @fmul_select_f64_test11(double %x, i32 %bool.arg1, i32 %bool.arg2) ; GFX9-SDAG: ; %bb.0: ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-SDAG-NEXT: v_bfrev_b32_e32 v4, 1 -; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v3, v4, -2.0, vcc +; GFX9-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, v2, v3 +; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v3, -2.0, v4, vcc ; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, 0 ; GFX9-SDAG-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] @@ -1249,9 +1249,9 @@ define double @fmul_select_f64_test11(double %x, i32 %bool.arg1, i32 %bool.arg2) ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-GISEL-NEXT: v_bfrev_b32_e32 v5, 1 -; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX9-GISEL-NEXT: v_cmp_ne_u32_e32 vcc, v2, v3 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0 -; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v5, v5, -2.0, vcc +; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v5, -2.0, v5, vcc ; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5] ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; @@ -1284,9 +1284,9 @@ define double @fmul_select_f64_test12(double %x, i32 %bool.arg1, i32 %bool.arg2) ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: v_bfrev_b32_e32 v5, 1 -; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX7-NEXT: v_cmp_ne_u32_e32 vcc, v2, v3 ; GFX7-NEXT: v_mov_b32_e32 v4, 0 -; GFX7-NEXT: v_cndmask_b32_e64 v5, v5, 0, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v5, 0, v5, vcc ; GFX7-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] ; @@ -1294,9 +1294,9 @@ define double @fmul_select_f64_test12(double %x, i32 %bool.arg1, i32 %bool.arg2) ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_bfrev_b32_e32 v5, 1 -; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, v2, v3 ; GFX9-NEXT: v_mov_b32_e32 v4, 0 -; GFX9-NEXT: v_cndmask_b32_e64 v5, v5, 0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v5, 0, v5, vcc ; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5] ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -1329,9 +1329,9 @@ define double @fmul_select_f64_test13(double %x, i32 %bool.arg1, i32 %bool.arg2) ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v5, 0x40300000 -; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX7-NEXT: v_cmp_ne_u32_e32 vcc, v2, v3 ; GFX7-NEXT: v_mov_b32_e32 v4, 0 -; GFX7-NEXT: v_cndmask_b32_e64 v5, v5, 0, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v5, 0, v5, vcc ; GFX7-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] ; @@ -1339,9 +1339,9 @@ define double @fmul_select_f64_test13(double %x, i32 %bool.arg1, i32 %bool.arg2) ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v5, 0x40300000 -; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, v2, v3 ; GFX9-NEXT: v_mov_b32_e32 v4, 0 -; GFX9-NEXT: v_cndmask_b32_e64 v5, v5, 0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v5, 0, v5, vcc ; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5] ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -1423,8 +1423,8 @@ define double @fmul_select_f64_test14_sel_log2val_pos92_neg27(double %x, i32 %bo ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-GISEL-NEXT: v_not_b32_e32 v4, 26 -; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 -; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v2, v4, 0x5c, vcc_lo +; GFX10-GISEL-NEXT: v_cmp_ne_u32_e32 vcc_lo, v2, v3 +; GFX10-GISEL-NEXT: v_cndmask_b32_e32 v2, 0x5c, v4, vcc_lo ; GFX10-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] ; @@ -1442,9 +1442,9 @@ define double @fmul_select_f64_test14_sel_log2val_pos92_neg27(double %x, i32 %bo ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-GISEL-NEXT: v_not_b32_e32 v4, 26 -; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 +; GFX11-GISEL-NEXT: v_cmp_ne_u32_e32 vcc_lo, v2, v3 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v2, v4, 0x5c, vcc_lo +; GFX11-GISEL-NEXT: v_cndmask_b32_e32 v2, 0x5c, v4, vcc_lo ; GFX11-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %bool = icmp eq i32 %bool.arg1, %bool.arg2 @@ -1507,8 +1507,8 @@ define double @fmul_select_f64_test15_sel_log2val_neg42_neg33(double %x, i32 %bo ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-GISEL-NEXT: v_not_b32_e32 v4, 32 -; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 -; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v2, v4, 0xffffffd6, vcc_lo +; GFX10-GISEL-NEXT: v_cmp_ne_u32_e32 vcc_lo, v2, v3 +; GFX10-GISEL-NEXT: v_cndmask_b32_e32 v2, 0xffffffd6, v4, vcc_lo ; GFX10-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] ; @@ -1526,9 +1526,9 @@ define double @fmul_select_f64_test15_sel_log2val_neg42_neg33(double %x, i32 %bo ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-GISEL-NEXT: v_not_b32_e32 v4, 32 -; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 +; GFX11-GISEL-NEXT: v_cmp_ne_u32_e32 vcc_lo, v2, v3 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v2, v4, 0xffffffd6, vcc_lo +; GFX11-GISEL-NEXT: v_cndmask_b32_e32 v2, 0xffffffd6, v4, vcc_lo ; GFX11-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %bool = icmp eq i32 %bool.arg1, %bool.arg2 @@ -2204,8 +2204,8 @@ define half @fmul_select_f16_test6(half %x, i32 %bool.arg1, i32 %bool.arg2) { ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, 0x4200 -; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 -; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, v3, 0xc800, vcc_lo +; GFX10-GISEL-NEXT: v_cmp_ne_u32_e32 vcc_lo, v1, v2 +; GFX10-GISEL-NEXT: v_cndmask_b32_e32 v1, 0xc800, v3, vcc_lo ; GFX10-GISEL-NEXT: v_mul_f16_e32 v0, v0, v1 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] ; @@ -2219,6 +2219,7 @@ define half @fmul_select_f16_test6(half %x, i32 %bool.arg1, i32 %bool.arg2) { ; GFX11-SDAG-TRUE16-NEXT: v_mul_f16_e32 v0.l, v0.l, v1.l ; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] ; +<<<<<<< HEAD ; GFX11-SDAG-FAKE16-LABEL: fmul_select_f16_test6: ; GFX11-SDAG-FAKE16: ; %bb.0: ; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2248,6 +2249,17 @@ define half @fmul_select_f16_test6(half %x, i32 %bool.arg1, i32 %bool.arg2) { ; GFX11-GISEL-FAKE16-NEXT: v_cndmask_b32_e64 v1, v3, 0xc800, vcc_lo ; GFX11-GISEL-FAKE16-NEXT: v_mul_f16_e32 v0, v0, v1 ; GFX11-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31] +======= +; GFX11-GISEL-LABEL: fmul_select_f16_test6: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 0x4200 +; GFX11-GISEL-NEXT: v_cmp_ne_u32_e32 vcc_lo, v1, v2 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_cndmask_b32_e32 v1, 0xc800, v3, vcc_lo +; GFX11-GISEL-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] +>>>>>>> e85470abb9d1 ([AMDGPU] Switch V_CNDMASK operands to shrink it into VOP2) %bool = icmp eq i32 %bool.arg1, %bool.arg2 %y = select i1 %bool, half -8.000000e+00, half 3.000000e+00 %ldexp = fmul half %x, %y @@ -2312,8 +2324,8 @@ define half @fmul_select_f16_test7(half %x, i32 %bool.arg1, i32 %bool.arg2) { ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, 0xc400 -; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 -; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, v3, 0x4800, vcc_lo +; GFX10-GISEL-NEXT: v_cmp_ne_u32_e32 vcc_lo, v1, v2 +; GFX10-GISEL-NEXT: v_cndmask_b32_e32 v1, 0x4800, v3, vcc_lo ; GFX10-GISEL-NEXT: v_mul_f16_e32 v0, v0, v1 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] ; @@ -2327,6 +2339,7 @@ define half @fmul_select_f16_test7(half %x, i32 %bool.arg1, i32 %bool.arg2) { ; GFX11-SDAG-TRUE16-NEXT: v_mul_f16_e32 v0.l, v0.l, v1.l ; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] ; +<<<<<<< HEAD ; GFX11-SDAG-FAKE16-LABEL: fmul_select_f16_test7: ; GFX11-SDAG-FAKE16: ; %bb.0: ; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2356,6 +2369,17 @@ define half @fmul_select_f16_test7(half %x, i32 %bool.arg1, i32 %bool.arg2) { ; GFX11-GISEL-FAKE16-NEXT: v_cndmask_b32_e64 v1, v3, 0x4800, vcc_lo ; GFX11-GISEL-FAKE16-NEXT: v_mul_f16_e32 v0, v0, v1 ; GFX11-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31] +======= +; GFX11-GISEL-LABEL: fmul_select_f16_test7: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 0xc400 +; GFX11-GISEL-NEXT: v_cmp_ne_u32_e32 vcc_lo, v1, v2 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_cndmask_b32_e32 v1, 0x4800, v3, vcc_lo +; GFX11-GISEL-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] +>>>>>>> e85470abb9d1 ([AMDGPU] Switch V_CNDMASK operands to shrink it into VOP2) %bool = icmp eq i32 %bool.arg1, %bool.arg2 %y = select i1 %bool, half 8.000000e+00, half -4.000000e+00 %ldexp = fmul half %x, %y @@ -3452,8 +3476,8 @@ define bfloat @fmul_select_bf16_test5(bfloat %x, i32 %bool.arg1, i32 %bool.arg2) ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0 ; GFX7-NEXT: v_mov_b32_e32 v3, 0x41000000 -; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 -; GFX7-NEXT: v_cndmask_b32_e64 v1, v3, 2.0, vcc +; GFX7-NEXT: v_cmp_ne_u32_e32 vcc, v1, v2 +; GFX7-NEXT: v_cndmask_b32_e32 v1, 2.0, v3, vcc ; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 ; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1 ; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 diff --git a/llvm/test/CodeGen/AMDGPU/div_i128.ll b/llvm/test/CodeGen/AMDGPU/div_i128.ll index 06c0417211809..e964cc67f8135 100644 --- a/llvm/test/CodeGen/AMDGPU/div_i128.ll +++ b/llvm/test/CodeGen/AMDGPU/div_i128.ll @@ -46,9 +46,9 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-NEXT: s_or_b64 s[4:5], vcc, s[4:5] ; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, 64, v3 ; GFX9-NEXT: v_addc_co_u32_e64 v4, s[6:7], 0, 0, vcc -; GFX9-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] +; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1] ; GFX9-NEXT: v_ffbh_u32_e32 v6, v11 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc ; GFX9-NEXT: v_ffbh_u32_e32 v3, v10 ; GFX9-NEXT: v_add_u32_e32 v3, 32, v3 ; GFX9-NEXT: v_min_u32_e32 v3, v3, v6 @@ -56,13 +56,13 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-NEXT: v_add_u32_e32 v6, 32, v6 ; GFX9-NEXT: v_ffbh_u32_e32 v7, v9 ; GFX9-NEXT: v_min_u32_e32 v6, v6, v7 -; GFX9-NEXT: v_cndmask_b32_e64 v4, v4, 0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc ; GFX9-NEXT: v_add_co_u32_e32 v6, vcc, 64, v6 ; GFX9-NEXT: v_addc_co_u32_e64 v7, s[6:7], 0, 0, vcc -; GFX9-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[10:11] +; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[10:11] ; GFX9-NEXT: v_mov_b32_e32 v5, 0 -; GFX9-NEXT: v_cndmask_b32_e32 v3, v6, v3, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v7, v7, 0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v7, 0, v7, vcc ; GFX9-NEXT: v_sub_co_u32_e32 v2, vcc, v2, v3 ; GFX9-NEXT: v_subb_co_u32_e32 v3, vcc, v4, v7, vcc ; GFX9-NEXT: v_subb_co_u32_e32 v4, vcc, 0, v5, vcc @@ -1287,11 +1287,11 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-G-NEXT: v_xor_b32_e32 v6, 0x7f, v0 ; GFX9-G-NEXT: v_or_b32_e32 v14, v6, v2 ; GFX9-G-NEXT: v_and_b32_e32 v6, 1, v20 -; GFX9-G-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 -; GFX9-G-NEXT: v_cndmask_b32_e64 v6, v8, 0, vcc -; GFX9-G-NEXT: v_cndmask_b32_e64 v7, v9, 0, vcc -; GFX9-G-NEXT: v_cndmask_b32_e64 v12, v10, 0, vcc -; GFX9-G-NEXT: v_cndmask_b32_e64 v13, v11, 0, vcc +; GFX9-G-NEXT: v_cmp_eq_u32_e32 vcc, 0, v6 +; GFX9-G-NEXT: v_cndmask_b32_e32 v6, 0, v8, vcc +; GFX9-G-NEXT: v_cndmask_b32_e32 v7, 0, v9, vcc +; GFX9-G-NEXT: v_cndmask_b32_e32 v12, 0, v10, vcc +; GFX9-G-NEXT: v_cndmask_b32_e32 v13, 0, v11, vcc ; GFX9-G-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[14:15] ; GFX9-G-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc ; GFX9-G-NEXT: v_or_b32_e32 v14, v20, v14 @@ -2324,9 +2324,9 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-NEXT: s_or_b64 s[4:5], vcc, s[4:5] ; GFX9-NEXT: v_add_co_u32_e32 v9, vcc, 64, v9 ; GFX9-NEXT: v_addc_co_u32_e64 v10, s[6:7], 0, 0, vcc -; GFX9-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[6:7] +; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[6:7] ; GFX9-NEXT: v_ffbh_u32_e32 v11, v3 -; GFX9-NEXT: v_cndmask_b32_e32 v8, v9, v8, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v8, v8, v9, vcc ; GFX9-NEXT: v_ffbh_u32_e32 v9, v2 ; GFX9-NEXT: v_add_u32_e32 v9, 32, v9 ; GFX9-NEXT: v_min_u32_e32 v9, v9, v11 @@ -2334,13 +2334,13 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-NEXT: v_add_u32_e32 v11, 32, v11 ; GFX9-NEXT: v_ffbh_u32_e32 v12, v1 ; GFX9-NEXT: v_min_u32_e32 v11, v11, v12 -; GFX9-NEXT: v_cndmask_b32_e64 v10, v10, 0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v10, 0, v10, vcc ; GFX9-NEXT: v_add_co_u32_e32 v11, vcc, 64, v11 ; GFX9-NEXT: v_addc_co_u32_e64 v12, s[6:7], 0, 0, vcc -; GFX9-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[2:3] +; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[2:3] ; GFX9-NEXT: s_mov_b64 s[6:7], 0x7f -; GFX9-NEXT: v_cndmask_b32_e32 v9, v11, v9, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v13, v12, 0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v9, v9, v11, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v13, 0, v12, vcc ; GFX9-NEXT: v_sub_co_u32_e32 v12, vcc, v8, v9 ; GFX9-NEXT: v_subb_co_u32_e32 v13, vcc, v10, v13, vcc ; GFX9-NEXT: v_mov_b32_e32 v8, 0 @@ -3414,11 +3414,11 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-G-NEXT: v_xor_b32_e32 v8, 0x7f, v12 ; GFX9-G-NEXT: v_or_b32_e32 v16, v8, v14 ; GFX9-G-NEXT: v_and_b32_e32 v8, 1, v18 -; GFX9-G-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 -; GFX9-G-NEXT: v_cndmask_b32_e64 v10, v0, 0, vcc -; GFX9-G-NEXT: v_cndmask_b32_e64 v11, v1, 0, vcc -; GFX9-G-NEXT: v_cndmask_b32_e64 v8, v2, 0, vcc -; GFX9-G-NEXT: v_cndmask_b32_e64 v9, v3, 0, vcc +; GFX9-G-NEXT: v_cmp_eq_u32_e32 vcc, 0, v8 +; GFX9-G-NEXT: v_cndmask_b32_e32 v10, 0, v0, vcc +; GFX9-G-NEXT: v_cndmask_b32_e32 v11, 0, v1, vcc +; GFX9-G-NEXT: v_cndmask_b32_e32 v8, 0, v2, vcc +; GFX9-G-NEXT: v_cndmask_b32_e32 v9, 0, v3, vcc ; GFX9-G-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[16:17] ; GFX9-G-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc ; GFX9-G-NEXT: v_or_b32_e32 v16, v18, v16 diff --git a/llvm/test/CodeGen/AMDGPU/div_v2i128.ll b/llvm/test/CodeGen/AMDGPU/div_v2i128.ll index 77b78f1f8a333..f08a03a890e3f 100644 --- a/llvm/test/CodeGen/AMDGPU/div_v2i128.ll +++ b/llvm/test/CodeGen/AMDGPU/div_v2i128.ll @@ -42,9 +42,9 @@ define <2 x i128> @v_sdiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) { ; SDAG-NEXT: v_addc_u32_e64 v3, s[8:9], 0, 0, s[8:9] ; SDAG-NEXT: v_subb_u32_e32 v8, vcc, 0, v11, vcc ; SDAG-NEXT: v_cndmask_b32_e64 v0, v10, v0, s[6:7] -; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[16:17] -; SDAG-NEXT: v_cndmask_b32_e64 v9, v3, 0, vcc -; SDAG-NEXT: v_cndmask_b32_e32 v10, v2, v1, vcc +; SDAG-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[16:17] +; SDAG-NEXT: v_cndmask_b32_e32 v9, 0, v3, vcc +; SDAG-NEXT: v_cndmask_b32_e32 v10, v1, v2, vcc ; SDAG-NEXT: v_ffbh_u32_e32 v3, v29 ; SDAG-NEXT: v_ffbh_u32_e32 v19, v28 ; SDAG-NEXT: v_cndmask_b32_e64 v1, v11, v8, s[6:7] @@ -59,9 +59,9 @@ define <2 x i128> @v_sdiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) { ; SDAG-NEXT: v_min_u32_e32 v2, v11, v19 ; SDAG-NEXT: v_add_i32_e64 v3, s[6:7], 64, v8 ; SDAG-NEXT: v_addc_u32_e64 v8, s[6:7], 0, 0, s[6:7] -; SDAG-NEXT: v_cmp_ne_u64_e64 s[6:7], 0, v[0:1] -; SDAG-NEXT: v_cndmask_b32_e64 v8, v8, 0, s[6:7] -; SDAG-NEXT: v_cndmask_b32_e64 v2, v3, v2, s[6:7] +; SDAG-NEXT: v_cmp_eq_u64_e64 s[6:7], 0, v[0:1] +; SDAG-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[6:7] +; SDAG-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[6:7] ; SDAG-NEXT: s_or_b64 s[6:7], vcc, s[4:5] ; SDAG-NEXT: v_sub_i32_e32 v2, vcc, v2, v10 ; SDAG-NEXT: v_subb_u32_e32 v3, vcc, v8, v9, vcc @@ -241,9 +241,9 @@ define <2 x i128> @v_sdiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) { ; SDAG-NEXT: v_cndmask_b32_e64 v0, v14, v10, s[4:5] ; SDAG-NEXT: v_ffbh_u32_e32 v10, v29 ; SDAG-NEXT: v_ffbh_u32_e32 v11, v28 -; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[6:7] -; SDAG-NEXT: v_cndmask_b32_e64 v12, v5, 0, vcc -; SDAG-NEXT: v_cndmask_b32_e32 v13, v4, v1, vcc +; SDAG-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[6:7] +; SDAG-NEXT: v_cndmask_b32_e32 v12, 0, v5, vcc +; SDAG-NEXT: v_cndmask_b32_e32 v13, v1, v4, vcc ; SDAG-NEXT: v_cndmask_b32_e64 v1, v15, v9, s[4:5] ; SDAG-NEXT: v_or_b32_e32 v4, v29, v0 ; SDAG-NEXT: v_ffbh_u32_e32 v9, v0 @@ -257,9 +257,9 @@ define <2 x i128> @v_sdiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) { ; SDAG-NEXT: v_add_i32_e64 v5, s[4:5], 64, v10 ; SDAG-NEXT: v_addc_u32_e64 v9, s[4:5], 0, 0, s[4:5] ; SDAG-NEXT: s_or_b64 s[6:7], vcc, s[6:7] -; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] -; SDAG-NEXT: v_cndmask_b32_e64 v9, v9, 0, vcc -; SDAG-NEXT: v_cndmask_b32_e32 v4, v5, v4, vcc +; SDAG-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1] +; SDAG-NEXT: v_cndmask_b32_e32 v9, 0, v9, vcc +; SDAG-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc ; SDAG-NEXT: v_sub_i32_e32 v4, vcc, v4, v13 ; SDAG-NEXT: v_subb_u32_e32 v5, vcc, v9, v12, vcc ; SDAG-NEXT: v_xor_b32_e32 v9, 0x7f, v4 @@ -495,13 +495,13 @@ define <2 x i128> @v_sdiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) { ; GISEL-NEXT: v_or_b32_e32 v8, v9, v8 ; GISEL-NEXT: v_and_b32_e32 v9, 1, v9 ; GISEL-NEXT: v_and_b32_e32 v8, 1, v8 -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v22, v18, 0, vcc +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v9 +; GISEL-NEXT: v_cndmask_b32_e32 v22, 0, v18, vcc ; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v8 -; GISEL-NEXT: v_cndmask_b32_e64 v8, v20, 0, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v9, v21, 0, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v8, 0, v20, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v9, 0, v21, vcc ; GISEL-NEXT: s_xor_b64 s[4:5], s[4:5], -1 -; GISEL-NEXT: v_cndmask_b32_e64 v23, v19, 0, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v23, 0, v19, vcc ; GISEL-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] ; GISEL-NEXT: s_cbranch_execz .LBB0_6 ; GISEL-NEXT: ; %bb.1: ; %udiv-bb15 @@ -685,12 +685,12 @@ define <2 x i128> @v_sdiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) { ; GISEL-NEXT: v_or_b32_e32 v11, v14, v15 ; GISEL-NEXT: v_and_b32_e32 v14, 1, v11 ; GISEL-NEXT: v_or_b32_e32 v10, v11, v10 -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v14 -; GISEL-NEXT: v_cndmask_b32_e64 v14, v6, 0, vcc +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v14 +; GISEL-NEXT: v_cndmask_b32_e32 v14, 0, v6, vcc ; GISEL-NEXT: v_and_b32_e32 v16, 1, v10 -; GISEL-NEXT: v_cndmask_b32_e64 v15, v7, 0, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v10, v12, 0, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v11, v13, 0, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v15, 0, v7, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v10, 0, v12, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v11, 0, v13, vcc ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v16 ; GISEL-NEXT: s_xor_b64 s[4:5], vcc, -1 ; GISEL-NEXT: s_and_saveexec_b64 s[12:13], s[4:5] @@ -863,12 +863,12 @@ define <2 x i128> @v_udiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) { ; SDAG-NEXT: v_addc_u32_e64 v20, s[4:5], 0, 0, vcc ; SDAG-NEXT: v_add_i32_e32 v19, vcc, 64, v19 ; SDAG-NEXT: v_addc_u32_e64 v21, s[4:5], 0, 0, vcc -; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[10:11] -; SDAG-NEXT: v_cndmask_b32_e64 v20, v20, 0, vcc -; SDAG-NEXT: v_cndmask_b32_e32 v16, v17, v16, vcc -; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[2:3] -; SDAG-NEXT: v_cndmask_b32_e64 v17, v21, 0, vcc -; SDAG-NEXT: v_cndmask_b32_e32 v18, v19, v18, vcc +; SDAG-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[10:11] +; SDAG-NEXT: v_cndmask_b32_e32 v20, 0, v20, vcc +; SDAG-NEXT: v_cndmask_b32_e32 v16, v16, v17, vcc +; SDAG-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[2:3] +; SDAG-NEXT: v_cndmask_b32_e32 v17, 0, v21, vcc +; SDAG-NEXT: v_cndmask_b32_e32 v18, v18, v19, vcc ; SDAG-NEXT: v_sub_i32_e32 v22, vcc, v16, v18 ; SDAG-NEXT: v_subb_u32_e32 v23, vcc, v20, v17, vcc ; SDAG-NEXT: v_xor_b32_e32 v16, 0x7f, v22 @@ -1038,12 +1038,12 @@ define <2 x i128> @v_udiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) { ; SDAG-NEXT: v_addc_u32_e64 v8, s[4:5], 0, 0, vcc ; SDAG-NEXT: v_add_i32_e32 v3, vcc, 64, v3 ; SDAG-NEXT: v_addc_u32_e64 v9, s[4:5], 0, 0, vcc -; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[14:15] -; SDAG-NEXT: v_cndmask_b32_e64 v8, v8, 0, vcc -; SDAG-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc -; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[6:7] -; SDAG-NEXT: v_cndmask_b32_e64 v1, v9, 0, vcc -; SDAG-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc +; SDAG-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[14:15] +; SDAG-NEXT: v_cndmask_b32_e32 v8, 0, v8, vcc +; SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; SDAG-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[6:7] +; SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v9, vcc +; SDAG-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc ; SDAG-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 ; SDAG-NEXT: v_subb_u32_e32 v1, vcc, v8, v1, vcc ; SDAG-NEXT: v_xor_b32_e32 v2, 0x7f, v0 @@ -1251,13 +1251,13 @@ define <2 x i128> @v_udiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) { ; GISEL-NEXT: v_or_b32_e32 v2, v3, v2 ; GISEL-NEXT: v_and_b32_e32 v3, 1, v3 ; GISEL-NEXT: v_and_b32_e32 v2, 1, v2 -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 -; GISEL-NEXT: v_cndmask_b32_e64 v18, v0, 0, vcc +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 +; GISEL-NEXT: v_cndmask_b32_e32 v18, 0, v0, vcc ; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v2 -; GISEL-NEXT: v_cndmask_b32_e64 v2, v16, 0, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v3, v17, 0, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v16, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v17, vcc ; GISEL-NEXT: s_xor_b64 s[4:5], s[4:5], -1 -; GISEL-NEXT: v_cndmask_b32_e64 v19, v1, 0, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v19, 0, v1, vcc ; GISEL-NEXT: s_and_saveexec_b64 s[12:13], s[4:5] ; GISEL-NEXT: s_cbranch_execz .LBB1_6 ; GISEL-NEXT: ; %bb.1: ; %udiv-bb15 @@ -1423,12 +1423,12 @@ define <2 x i128> @v_udiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) { ; GISEL-NEXT: v_or_b32_e32 v9, v20, v10 ; GISEL-NEXT: v_and_b32_e32 v10, 1, v9 ; GISEL-NEXT: v_or_b32_e32 v8, v9, v8 -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10 -; GISEL-NEXT: v_cndmask_b32_e64 v10, v4, 0, vcc +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v10 +; GISEL-NEXT: v_cndmask_b32_e32 v10, 0, v4, vcc ; GISEL-NEXT: v_and_b32_e32 v20, 1, v8 -; GISEL-NEXT: v_cndmask_b32_e64 v11, v5, 0, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v8, v6, 0, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v9, v7, 0, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v11, 0, v5, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v8, 0, v6, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v9, 0, v7, vcc ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v20 ; GISEL-NEXT: s_xor_b64 s[4:5], vcc, -1 ; GISEL-NEXT: s_and_saveexec_b64 s[12:13], s[4:5] @@ -1595,9 +1595,9 @@ define <2 x i128> @v_srem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) { ; SDAG-NEXT: v_addc_u32_e64 v18, s[8:9], 0, 0, s[8:9] ; SDAG-NEXT: v_subb_u32_e32 v20, vcc, 0, v11, vcc ; SDAG-NEXT: v_cndmask_b32_e64 v2, v10, v9, s[4:5] -; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] -; SDAG-NEXT: v_cndmask_b32_e64 v18, v18, 0, vcc -; SDAG-NEXT: v_cndmask_b32_e32 v10, v8, v3, vcc +; SDAG-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1] +; SDAG-NEXT: v_cndmask_b32_e32 v18, 0, v18, vcc +; SDAG-NEXT: v_cndmask_b32_e32 v10, v3, v8, vcc ; SDAG-NEXT: v_ffbh_u32_e32 v9, v31 ; SDAG-NEXT: v_ffbh_u32_e32 v21, v30 ; SDAG-NEXT: v_cndmask_b32_e64 v3, v11, v20, s[4:5] @@ -1612,9 +1612,9 @@ define <2 x i128> @v_srem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) { ; SDAG-NEXT: v_min_u32_e32 v8, v20, v21 ; SDAG-NEXT: v_add_i32_e64 v9, s[4:5], 64, v11 ; SDAG-NEXT: v_addc_u32_e64 v11, s[4:5], 0, 0, s[4:5] -; SDAG-NEXT: v_cmp_ne_u64_e64 s[4:5], 0, v[2:3] -; SDAG-NEXT: v_cndmask_b32_e64 v11, v11, 0, s[4:5] -; SDAG-NEXT: v_cndmask_b32_e64 v8, v9, v8, s[4:5] +; SDAG-NEXT: v_cmp_eq_u64_e64 s[4:5], 0, v[2:3] +; SDAG-NEXT: v_cndmask_b32_e64 v11, 0, v11, s[4:5] +; SDAG-NEXT: v_cndmask_b32_e64 v8, v8, v9, s[4:5] ; SDAG-NEXT: s_or_b64 s[6:7], vcc, s[6:7] ; SDAG-NEXT: v_sub_i32_e32 v10, vcc, v8, v10 ; SDAG-NEXT: v_subb_u32_e32 v11, vcc, v11, v18, vcc @@ -1792,9 +1792,9 @@ define <2 x i128> @v_srem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) { ; SDAG-NEXT: v_cndmask_b32_e64 v6, v14, v11, s[4:5] ; SDAG-NEXT: v_ffbh_u32_e32 v11, v37 ; SDAG-NEXT: v_ffbh_u32_e32 v14, v36 -; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] -; SDAG-NEXT: v_cndmask_b32_e64 v12, v12, 0, vcc -; SDAG-NEXT: v_cndmask_b32_e32 v19, v10, v7, vcc +; SDAG-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[4:5] +; SDAG-NEXT: v_cndmask_b32_e32 v12, 0, v12, vcc +; SDAG-NEXT: v_cndmask_b32_e32 v19, v7, v10, vcc ; SDAG-NEXT: v_cndmask_b32_e64 v7, v15, v13, s[4:5] ; SDAG-NEXT: v_or_b32_e32 v10, v37, v6 ; SDAG-NEXT: v_ffbh_u32_e32 v13, v6 @@ -1808,9 +1808,9 @@ define <2 x i128> @v_srem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) { ; SDAG-NEXT: v_add_i32_e64 v11, s[4:5], 64, v14 ; SDAG-NEXT: v_addc_u32_e64 v13, s[4:5], 0, 0, s[4:5] ; SDAG-NEXT: s_or_b64 s[6:7], vcc, s[6:7] -; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[6:7] -; SDAG-NEXT: v_cndmask_b32_e64 v13, v13, 0, vcc -; SDAG-NEXT: v_cndmask_b32_e32 v10, v11, v10, vcc +; SDAG-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[6:7] +; SDAG-NEXT: v_cndmask_b32_e32 v13, 0, v13, vcc +; SDAG-NEXT: v_cndmask_b32_e32 v10, v10, v11, vcc ; SDAG-NEXT: v_sub_i32_e32 v10, vcc, v10, v19 ; SDAG-NEXT: v_subb_u32_e32 v11, vcc, v13, v12, vcc ; SDAG-NEXT: v_xor_b32_e32 v14, 0x7f, v10 @@ -2093,13 +2093,13 @@ define <2 x i128> @v_srem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) { ; GISEL-NEXT: v_or_b32_e32 v18, v19, v18 ; GISEL-NEXT: v_and_b32_e32 v19, 1, v19 ; GISEL-NEXT: v_and_b32_e32 v18, 1, v18 -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v19 -; GISEL-NEXT: v_cndmask_b32_e64 v31, v16, 0, vcc +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v19 +; GISEL-NEXT: v_cndmask_b32_e32 v31, 0, v16, vcc ; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v18 -; GISEL-NEXT: v_cndmask_b32_e64 v18, v8, 0, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v19, v9, 0, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v18, 0, v8, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v19, 0, v9, vcc ; GISEL-NEXT: s_xor_b64 s[4:5], s[4:5], -1 -; GISEL-NEXT: v_cndmask_b32_e64 v32, v17, 0, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v32, 0, v17, vcc ; GISEL-NEXT: s_and_saveexec_b64 s[12:13], s[4:5] ; GISEL-NEXT: s_cbranch_execz .LBB2_6 ; GISEL-NEXT: ; %bb.1: ; %udiv-bb15 @@ -2283,12 +2283,12 @@ define <2 x i128> @v_srem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) { ; GISEL-NEXT: v_or_b32_e32 v3, v20, v21 ; GISEL-NEXT: v_and_b32_e32 v20, 1, v3 ; GISEL-NEXT: v_or_b32_e32 v2, v3, v2 -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v20 -; GISEL-NEXT: v_cndmask_b32_e64 v20, v12, 0, vcc +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v20 +; GISEL-NEXT: v_cndmask_b32_e32 v20, 0, v12, vcc ; GISEL-NEXT: v_and_b32_e32 v22, 1, v2 -; GISEL-NEXT: v_cndmask_b32_e64 v21, v13, 0, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v2, v6, 0, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v3, v7, 0, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v21, 0, v13, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v6, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v7, vcc ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v22 ; GISEL-NEXT: s_xor_b64 s[4:5], vcc, -1 ; GISEL-NEXT: s_and_saveexec_b64 s[12:13], s[4:5] @@ -2493,12 +2493,12 @@ define <2 x i128> @v_urem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) { ; SDAG-NEXT: v_addc_u32_e64 v20, s[4:5], 0, 0, vcc ; SDAG-NEXT: v_add_i32_e32 v19, vcc, 64, v19 ; SDAG-NEXT: v_addc_u32_e64 v21, s[4:5], 0, 0, vcc -; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[10:11] -; SDAG-NEXT: v_cndmask_b32_e64 v20, v20, 0, vcc -; SDAG-NEXT: v_cndmask_b32_e32 v16, v17, v16, vcc -; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[2:3] -; SDAG-NEXT: v_cndmask_b32_e64 v17, v21, 0, vcc -; SDAG-NEXT: v_cndmask_b32_e32 v18, v19, v18, vcc +; SDAG-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[10:11] +; SDAG-NEXT: v_cndmask_b32_e32 v20, 0, v20, vcc +; SDAG-NEXT: v_cndmask_b32_e32 v16, v16, v17, vcc +; SDAG-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[2:3] +; SDAG-NEXT: v_cndmask_b32_e32 v17, 0, v21, vcc +; SDAG-NEXT: v_cndmask_b32_e32 v18, v18, v19, vcc ; SDAG-NEXT: v_sub_i32_e32 v18, vcc, v16, v18 ; SDAG-NEXT: v_subb_u32_e32 v19, vcc, v20, v17, vcc ; SDAG-NEXT: v_xor_b32_e32 v16, 0x7f, v18 @@ -2668,12 +2668,12 @@ define <2 x i128> @v_urem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) { ; SDAG-NEXT: v_addc_u32_e64 v20, s[4:5], 0, 0, vcc ; SDAG-NEXT: v_add_i32_e32 v19, vcc, 64, v19 ; SDAG-NEXT: v_addc_u32_e64 v21, s[4:5], 0, 0, vcc -; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[14:15] -; SDAG-NEXT: v_cndmask_b32_e64 v20, v20, 0, vcc -; SDAG-NEXT: v_cndmask_b32_e32 v16, v17, v16, vcc -; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[6:7] -; SDAG-NEXT: v_cndmask_b32_e64 v17, v21, 0, vcc -; SDAG-NEXT: v_cndmask_b32_e32 v18, v19, v18, vcc +; SDAG-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[14:15] +; SDAG-NEXT: v_cndmask_b32_e32 v20, 0, v20, vcc +; SDAG-NEXT: v_cndmask_b32_e32 v16, v16, v17, vcc +; SDAG-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[6:7] +; SDAG-NEXT: v_cndmask_b32_e32 v17, 0, v21, vcc +; SDAG-NEXT: v_cndmask_b32_e32 v18, v18, v19, vcc ; SDAG-NEXT: v_sub_i32_e32 v16, vcc, v16, v18 ; SDAG-NEXT: v_subb_u32_e32 v17, vcc, v20, v17, vcc ; SDAG-NEXT: v_xor_b32_e32 v18, 0x7f, v16 @@ -2920,13 +2920,13 @@ define <2 x i128> @v_urem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) { ; GISEL-NEXT: v_or_b32_e32 v20, v21, v20 ; GISEL-NEXT: v_and_b32_e32 v21, 1, v21 ; GISEL-NEXT: v_and_b32_e32 v20, 1, v20 -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v21 -; GISEL-NEXT: v_cndmask_b32_e64 v32, v0, 0, vcc +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v21 +; GISEL-NEXT: v_cndmask_b32_e32 v32, 0, v0, vcc ; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v20 -; GISEL-NEXT: v_cndmask_b32_e64 v20, v2, 0, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v21, v3, 0, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v20, 0, v2, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v21, 0, v3, vcc ; GISEL-NEXT: s_xor_b64 s[4:5], s[4:5], -1 -; GISEL-NEXT: v_cndmask_b32_e64 v33, v1, 0, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v33, 0, v1, vcc ; GISEL-NEXT: s_and_saveexec_b64 s[12:13], s[4:5] ; GISEL-NEXT: s_cbranch_execz .LBB3_6 ; GISEL-NEXT: ; %bb.1: ; %udiv-bb15 @@ -3092,12 +3092,12 @@ define <2 x i128> @v_urem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) { ; GISEL-NEXT: v_or_b32_e32 v19, v26, v24 ; GISEL-NEXT: v_and_b32_e32 v24, 1, v19 ; GISEL-NEXT: v_or_b32_e32 v18, v19, v18 -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v24 -; GISEL-NEXT: v_cndmask_b32_e64 v24, v4, 0, vcc +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v24 +; GISEL-NEXT: v_cndmask_b32_e32 v24, 0, v4, vcc ; GISEL-NEXT: v_and_b32_e32 v26, 1, v18 -; GISEL-NEXT: v_cndmask_b32_e64 v25, v5, 0, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v18, v6, 0, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v19, v7, 0, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v25, 0, v5, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v18, 0, v6, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v19, 0, v7, vcc ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v26 ; GISEL-NEXT: s_xor_b64 s[4:5], vcc, -1 ; GISEL-NEXT: s_and_saveexec_b64 s[12:13], s[4:5] diff --git a/llvm/test/CodeGen/AMDGPU/extract-subvector.ll b/llvm/test/CodeGen/AMDGPU/extract-subvector.ll index 41082821bafe3..89aa3945aca07 100644 --- a/llvm/test/CodeGen/AMDGPU/extract-subvector.ll +++ b/llvm/test/CodeGen/AMDGPU/extract-subvector.ll @@ -210,14 +210,14 @@ define <4 x i64> @extract_4xi64(ptr addrspace(1) %p0, ptr addrspace(1) %p1, i1 % ; GCN-NEXT: s_or_b64 exec, exec, s[4:5] ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: v_mov_b32_e32 v1, 0xffff8000 -; GCN-NEXT: v_cmp_gt_i64_e32 vcc, 0, v[4:5] -; GCN-NEXT: v_cndmask_b32_e64 v0, v1, -1, vcc -; GCN-NEXT: v_cmp_gt_i64_e32 vcc, 0, v[6:7] -; GCN-NEXT: v_cndmask_b32_e64 v2, v1, -1, vcc -; GCN-NEXT: v_cmp_gt_i64_e32 vcc, 0, v[8:9] -; GCN-NEXT: v_cndmask_b32_e64 v4, v1, -1, vcc -; GCN-NEXT: v_cmp_gt_i64_e32 vcc, 0, v[10:11] -; GCN-NEXT: v_cndmask_b32_e64 v6, v1, -1, vcc +; GCN-NEXT: v_cmp_le_i64_e32 vcc, 0, v[4:5] +; GCN-NEXT: v_cndmask_b32_e32 v0, -1, v1, vcc +; GCN-NEXT: v_cmp_le_i64_e32 vcc, 0, v[6:7] +; GCN-NEXT: v_cndmask_b32_e32 v2, -1, v1, vcc +; GCN-NEXT: v_cmp_le_i64_e32 vcc, 0, v[8:9] +; GCN-NEXT: v_cndmask_b32_e32 v4, -1, v1, vcc +; GCN-NEXT: v_cmp_le_i64_e32 vcc, 0, v[10:11] +; GCN-NEXT: v_cndmask_b32_e32 v6, -1, v1, vcc ; GCN-NEXT: v_mov_b32_e32 v1, -1 ; GCN-NEXT: v_mov_b32_e32 v3, -1 ; GCN-NEXT: v_mov_b32_e32 v5, -1 @@ -300,23 +300,23 @@ define <8 x i64> @extract_8xi64(ptr addrspace(1) %p0, ptr addrspace(1) %p1, i1 % ; GCN-NEXT: .LBB3_4: ; %exit ; GCN-NEXT: s_or_b64 exec, exec, s[4:5] ; GCN-NEXT: v_mov_b32_e32 v1, 0xffff8000 -; GCN-NEXT: v_cmp_gt_i64_e32 vcc, 0, v[6:7] -; GCN-NEXT: v_cmp_gt_i64_e64 s[4:5], 0, v[8:9] -; GCN-NEXT: v_cmp_gt_i64_e64 s[6:7], 0, v[10:11] -; GCN-NEXT: v_cmp_gt_i64_e64 s[8:9], 0, v[12:13] -; GCN-NEXT: v_cmp_gt_i64_e64 s[10:11], 0, v[14:15] -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_cmp_gt_i64_e64 s[12:13], 0, v[16:17] -; GCN-NEXT: v_cmp_gt_i64_e64 s[14:15], 0, v[18:19] -; GCN-NEXT: v_cmp_gt_i64_e64 s[16:17], 0, v[4:5] -; GCN-NEXT: v_cndmask_b32_e64 v0, v1, -1, s[16:17] -; GCN-NEXT: v_cndmask_b32_e64 v2, v1, -1, vcc -; GCN-NEXT: v_cndmask_b32_e64 v4, v1, -1, s[4:5] -; GCN-NEXT: v_cndmask_b32_e64 v6, v1, -1, s[6:7] -; GCN-NEXT: v_cndmask_b32_e64 v8, v1, -1, s[8:9] -; GCN-NEXT: v_cndmask_b32_e64 v10, v1, -1, s[10:11] -; GCN-NEXT: v_cndmask_b32_e64 v12, v1, -1, s[12:13] -; GCN-NEXT: v_cndmask_b32_e64 v14, v1, -1, s[14:15] +; GCN-NEXT: v_cmp_le_i64_e32 vcc, 0, v[6:7] +; GCN-NEXT: v_cmp_le_i64_e64 s[4:5], 0, v[8:9] +; GCN-NEXT: v_cmp_le_i64_e64 s[6:7], 0, v[10:11] +; GCN-NEXT: v_cmp_le_i64_e64 s[8:9], 0, v[12:13] +; GCN-NEXT: v_cmp_le_i64_e64 s[10:11], 0, v[14:15] +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_cmp_le_i64_e64 s[12:13], 0, v[16:17] +; GCN-NEXT: v_cmp_le_i64_e64 s[14:15], 0, v[18:19] +; GCN-NEXT: v_cmp_le_i64_e64 s[16:17], 0, v[4:5] +; GCN-NEXT: v_cndmask_b32_e64 v0, -1, v1, s[16:17] +; GCN-NEXT: v_cndmask_b32_e32 v2, -1, v1, vcc +; GCN-NEXT: v_cndmask_b32_e64 v4, -1, v1, s[4:5] +; GCN-NEXT: v_cndmask_b32_e64 v6, -1, v1, s[6:7] +; GCN-NEXT: v_cndmask_b32_e64 v8, -1, v1, s[8:9] +; GCN-NEXT: v_cndmask_b32_e64 v10, -1, v1, s[10:11] +; GCN-NEXT: v_cndmask_b32_e64 v12, -1, v1, s[12:13] +; GCN-NEXT: v_cndmask_b32_e64 v14, -1, v1, s[14:15] ; GCN-NEXT: v_mov_b32_e32 v1, -1 ; GCN-NEXT: v_mov_b32_e32 v3, -1 ; GCN-NEXT: v_mov_b32_e32 v5, -1 @@ -389,10 +389,10 @@ define <2 x double> @extract_2xf64(ptr addrspace(1) %p0, ptr addrspace(1) %p1, i ; GCN-NEXT: s_or_b64 exec, exec, s[4:5] ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: v_mov_b32_e32 v0, 0xbff00000 -; GCN-NEXT: v_cmp_lt_f64_e32 vcc, -1.0, v[4:5] -; GCN-NEXT: v_cndmask_b32_e64 v1, v0, -2.0, vcc -; GCN-NEXT: v_cmp_lt_f64_e32 vcc, -1.0, v[6:7] -; GCN-NEXT: v_cndmask_b32_e64 v3, v0, -2.0, vcc +; GCN-NEXT: v_cmp_nlt_f64_e32 vcc, -1.0, v[4:5] +; GCN-NEXT: v_cndmask_b32_e32 v1, -2.0, v0, vcc +; GCN-NEXT: v_cmp_nlt_f64_e32 vcc, -1.0, v[6:7] +; GCN-NEXT: v_cndmask_b32_e32 v3, -2.0, v0, vcc ; GCN-NEXT: v_mov_b32_e32 v0, 0 ; GCN-NEXT: v_mov_b32_e32 v2, 0 ; GCN-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/AMDGPU/flat_atomics_i64.ll b/llvm/test/CodeGen/AMDGPU/flat_atomics_i64.ll index ffe0596a95e33..a807ead48ecbd 100644 --- a/llvm/test/CodeGen/AMDGPU/flat_atomics_i64.ll +++ b/llvm/test/CodeGen/AMDGPU/flat_atomics_i64.ll @@ -12317,9 +12317,9 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_offset(ptr %out, i64 %in, i64 %old ; GFX12-NEXT: s_cselect_b32 s0, s0, -1 ; GFX12-NEXT: scratch_load_b64 v[0:1], off, s0 ; GFX12-NEXT: s_wait_loadcnt 0x0 -; GFX12-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1] -; GFX12-NEXT: v_cndmask_b32_e64 v1, v1, s3, vcc_lo -; GFX12-NEXT: v_cndmask_b32_e64 v0, v0, s2, vcc_lo +; GFX12-NEXT: v_cmp_ne_u64_e32 vcc_lo, s[4:5], v[0:1] +; GFX12-NEXT: v_cndmask_b32_e32 v1, s3, v1, vcc_lo +; GFX12-NEXT: v_cndmask_b32_e32 v0, s2, v0, vcc_lo ; GFX12-NEXT: scratch_store_b64 off, v[0:1], s0 ; GFX12-NEXT: s_endpgm entry: @@ -12468,9 +12468,9 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_soffset(ptr %out, i64 %in, i64 %ol ; GFX12-NEXT: s_cselect_b32 s0, s0, -1 ; GFX12-NEXT: scratch_load_b64 v[0:1], off, s0 ; GFX12-NEXT: s_wait_loadcnt 0x0 -; GFX12-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1] -; GFX12-NEXT: v_cndmask_b32_e64 v1, v1, s3, vcc_lo -; GFX12-NEXT: v_cndmask_b32_e64 v0, v0, s2, vcc_lo +; GFX12-NEXT: v_cmp_ne_u64_e32 vcc_lo, s[4:5], v[0:1] +; GFX12-NEXT: v_cndmask_b32_e32 v1, s3, v1, vcc_lo +; GFX12-NEXT: v_cndmask_b32_e32 v0, s2, v0, vcc_lo ; GFX12-NEXT: scratch_store_b64 off, v[0:1], s0 ; GFX12-NEXT: s_endpgm entry: @@ -12614,9 +12614,9 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_ret_offset(ptr %out, ptr %out2, i6 ; GFX12-NEXT: s_cselect_b32 s0, s0, -1 ; GFX12-NEXT: scratch_load_b64 v[0:1], off, s0 ; GFX12-NEXT: s_wait_loadcnt 0x0 -; GFX12-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[6:7], v[0:1] -; GFX12-NEXT: v_cndmask_b32_e64 v3, v1, s5, vcc_lo -; GFX12-NEXT: v_cndmask_b32_e64 v2, v0, s4, vcc_lo +; GFX12-NEXT: v_cmp_ne_u64_e32 vcc_lo, s[6:7], v[0:1] +; GFX12-NEXT: v_cndmask_b32_e32 v3, s5, v1, vcc_lo +; GFX12-NEXT: v_cndmask_b32_e32 v2, s4, v0, vcc_lo ; GFX12-NEXT: scratch_store_b64 off, v[2:3], s0 ; GFX12-NEXT: .LBB92_4: ; %atomicrmw.end ; GFX12-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 @@ -12775,9 +12775,9 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_addr64_offset(ptr %out, i64 %in, i ; GFX12-NEXT: s_cselect_b32 s0, s0, -1 ; GFX12-NEXT: scratch_load_b64 v[0:1], off, s0 ; GFX12-NEXT: s_wait_loadcnt 0x0 -; GFX12-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[6:7], v[0:1] -; GFX12-NEXT: v_cndmask_b32_e64 v1, v1, s3, vcc_lo -; GFX12-NEXT: v_cndmask_b32_e64 v0, v0, s2, vcc_lo +; GFX12-NEXT: v_cmp_ne_u64_e32 vcc_lo, s[6:7], v[0:1] +; GFX12-NEXT: v_cndmask_b32_e32 v1, s3, v1, vcc_lo +; GFX12-NEXT: v_cndmask_b32_e32 v0, s2, v0, vcc_lo ; GFX12-NEXT: scratch_store_b64 off, v[0:1], s0 ; GFX12-NEXT: s_endpgm entry: @@ -12935,9 +12935,9 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_ret_addr64_offset(ptr %out, ptr %o ; GFX12-NEXT: s_cselect_b32 s2, s2, -1 ; GFX12-NEXT: scratch_load_b64 v[0:1], off, s2 ; GFX12-NEXT: s_wait_loadcnt 0x0 -; GFX12-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[0:1], v[0:1] -; GFX12-NEXT: v_cndmask_b32_e64 v3, v1, s13, vcc_lo -; GFX12-NEXT: v_cndmask_b32_e64 v2, v0, s12, vcc_lo +; GFX12-NEXT: v_cmp_ne_u64_e32 vcc_lo, s[0:1], v[0:1] +; GFX12-NEXT: v_cndmask_b32_e32 v3, s13, v1, vcc_lo +; GFX12-NEXT: v_cndmask_b32_e32 v2, s12, v0, vcc_lo ; GFX12-NEXT: scratch_store_b64 off, v[2:3], s2 ; GFX12-NEXT: .LBB94_4: ; %atomicrmw.end ; GFX12-NEXT: v_dual_mov_b32 v2, s10 :: v_dual_mov_b32 v3, s11 @@ -13087,9 +13087,9 @@ define amdgpu_kernel void @atomic_cmpxchg_i64(ptr %out, i64 %in, i64 %old) { ; GFX12-NEXT: s_cselect_b32 s0, s0, -1 ; GFX12-NEXT: scratch_load_b64 v[0:1], off, s0 ; GFX12-NEXT: s_wait_loadcnt 0x0 -; GFX12-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1] -; GFX12-NEXT: v_cndmask_b32_e64 v1, v1, s3, vcc_lo -; GFX12-NEXT: v_cndmask_b32_e64 v0, v0, s2, vcc_lo +; GFX12-NEXT: v_cmp_ne_u64_e32 vcc_lo, s[4:5], v[0:1] +; GFX12-NEXT: v_cndmask_b32_e32 v1, s3, v1, vcc_lo +; GFX12-NEXT: v_cndmask_b32_e32 v0, s2, v0, vcc_lo ; GFX12-NEXT: scratch_store_b64 off, v[0:1], s0 ; GFX12-NEXT: s_endpgm entry: @@ -13227,9 +13227,9 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_ret(ptr %out, ptr %out2, i64 %in, ; GFX12-NEXT: s_cselect_b32 s0, s0, -1 ; GFX12-NEXT: scratch_load_b64 v[0:1], off, s0 ; GFX12-NEXT: s_wait_loadcnt 0x0 -; GFX12-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[6:7], v[0:1] -; GFX12-NEXT: v_cndmask_b32_e64 v3, v1, s5, vcc_lo -; GFX12-NEXT: v_cndmask_b32_e64 v2, v0, s4, vcc_lo +; GFX12-NEXT: v_cmp_ne_u64_e32 vcc_lo, s[6:7], v[0:1] +; GFX12-NEXT: v_cndmask_b32_e32 v3, s5, v1, vcc_lo +; GFX12-NEXT: v_cndmask_b32_e32 v2, s4, v0, vcc_lo ; GFX12-NEXT: scratch_store_b64 off, v[2:3], s0 ; GFX12-NEXT: .LBB96_4: ; %atomicrmw.end ; GFX12-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 @@ -13382,9 +13382,9 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_addr64(ptr %out, i64 %in, i64 %ind ; GFX12-NEXT: s_cselect_b32 s0, s0, -1 ; GFX12-NEXT: scratch_load_b64 v[0:1], off, s0 ; GFX12-NEXT: s_wait_loadcnt 0x0 -; GFX12-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[6:7], v[0:1] -; GFX12-NEXT: v_cndmask_b32_e64 v1, v1, s3, vcc_lo -; GFX12-NEXT: v_cndmask_b32_e64 v0, v0, s2, vcc_lo +; GFX12-NEXT: v_cmp_ne_u64_e32 vcc_lo, s[6:7], v[0:1] +; GFX12-NEXT: v_cndmask_b32_e32 v1, s3, v1, vcc_lo +; GFX12-NEXT: v_cndmask_b32_e32 v0, s2, v0, vcc_lo ; GFX12-NEXT: scratch_store_b64 off, v[0:1], s0 ; GFX12-NEXT: s_endpgm entry: @@ -13536,9 +13536,9 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_ret_addr64(ptr %out, ptr %out2, i6 ; GFX12-NEXT: s_cselect_b32 s2, s2, -1 ; GFX12-NEXT: scratch_load_b64 v[0:1], off, s2 ; GFX12-NEXT: s_wait_loadcnt 0x0 -; GFX12-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[0:1], v[0:1] -; GFX12-NEXT: v_cndmask_b32_e64 v3, v1, s13, vcc_lo -; GFX12-NEXT: v_cndmask_b32_e64 v2, v0, s12, vcc_lo +; GFX12-NEXT: v_cmp_ne_u64_e32 vcc_lo, s[0:1], v[0:1] +; GFX12-NEXT: v_cndmask_b32_e32 v3, s13, v1, vcc_lo +; GFX12-NEXT: v_cndmask_b32_e32 v2, s12, v0, vcc_lo ; GFX12-NEXT: scratch_store_b64 off, v[2:3], s2 ; GFX12-NEXT: .LBB98_4: ; %atomicrmw.end ; GFX12-NEXT: v_dual_mov_b32 v2, s10 :: v_dual_mov_b32 v3, s11 diff --git a/llvm/test/CodeGen/AMDGPU/fmaximum3.ll b/llvm/test/CodeGen/AMDGPU/fmaximum3.ll index 53d940e1e6c1a..450c9d3ae64c4 100644 --- a/llvm/test/CodeGen/AMDGPU/fmaximum3.ll +++ b/llvm/test/CodeGen/AMDGPU/fmaximum3.ll @@ -3167,15 +3167,15 @@ define double @v_fmaximum3_f64(double %a, double %b, double %c) { ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_max_f64 v[6:7], v[0:1], v[2:3] ; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000 -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[2:3] ; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v8, v7, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v6, vcc ; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], v[4:5] -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[4:5] ; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v8, v3, vcc ; GFX9-NEXT: s_setpc_b64 s[30:31] %max0 = call double @llvm.maximum.f64(double %a, double %b) %max1 = call double @llvm.maximum.f64(double %max0, double %c) @@ -3200,15 +3200,15 @@ define double @v_fmaximum3_f64_commute(double %a, double %b, double %c) { ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_max_f64 v[6:7], v[0:1], v[2:3] ; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000 -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[2:3] ; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v8, v7, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v6, vcc ; GFX9-NEXT: v_max_f64 v[2:3], v[4:5], v[0:1] -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[0:1] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[4:5], v[0:1] ; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v8, v3, vcc ; GFX9-NEXT: s_setpc_b64 s[30:31] %max0 = call double @llvm.maximum.f64(double %a, double %b) %max1 = call double @llvm.maximum.f64(double %c, double %max0) @@ -3232,15 +3232,15 @@ define amdgpu_ps <2 x i32> @s_fmaximum3_f64(double inreg %a, double inreg %b, do ; GFX9-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX9-NEXT: v_max_f64 v[2:3], s[0:1], v[0:1] ; GFX9-NEXT: v_mov_b32_e32 v4, 0x7ff80000 -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, s[0:1], v[0:1] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, s[0:1], v[0:1] ; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], s[4:5] -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, s[4:5], v[0:1] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, s[4:5], v[0:1] ; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v4, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v1, v2, 0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc ; GFX9-NEXT: v_readfirstlane_b32 s1, v0 ; GFX9-NEXT: v_readfirstlane_b32 s0, v1 ; GFX9-NEXT: ; return to shader part epilog @@ -3279,10 +3279,10 @@ define double @v_fmaximum3_f64_fabs0(double %a, double %b, double %c) { ; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc ; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc ; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], v[4:5] -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[4:5] ; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v8, v3, vcc ; GFX9-NEXT: s_setpc_b64 s[30:31] %a.fabs = call double @llvm.fabs.f64(double %a) %max0 = call double @llvm.maximum.f64(double %a.fabs, double %b) @@ -3313,10 +3313,10 @@ define double @v_fmaximum3_f64_fabs1(double %a, double %b, double %c) { ; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc ; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc ; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], v[4:5] -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[4:5] ; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v8, v3, vcc ; GFX9-NEXT: s_setpc_b64 s[30:31] %b.fabs = call double @llvm.fabs.f64(double %b) %max0 = call double @llvm.maximum.f64(double %a, double %b.fabs) @@ -3342,10 +3342,10 @@ define double @v_fmaximum3_f64_fabs2(double %a, double %b, double %c) { ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_max_f64 v[6:7], v[0:1], v[2:3] ; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000 -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[2:3] ; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v8, v7, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v6, vcc ; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], |v[4:5]| ; GFX9-NEXT: v_cmp_u_f64_e64 vcc, v[0:1], |v[4:5]| ; GFX9-NEXT: s_nop 1 @@ -3492,10 +3492,10 @@ define double @v_fmaximum3_f64_fneg0(double %a, double %b, double %c) { ; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc ; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc ; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], v[4:5] -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[4:5] ; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v8, v3, vcc ; GFX9-NEXT: s_setpc_b64 s[30:31] %a.fneg = fneg double %a %max0 = call double @llvm.maximum.f64(double %a.fneg, double %b) @@ -3526,10 +3526,10 @@ define double @v_fmaximum3_f64_fneg1(double %a, double %b, double %c) { ; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc ; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc ; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], v[4:5] -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[4:5] ; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v8, v3, vcc ; GFX9-NEXT: s_setpc_b64 s[30:31] %b.fneg = fneg double %b %max0 = call double @llvm.maximum.f64(double %a, double %b.fneg) @@ -3555,10 +3555,10 @@ define double @v_fmaximum3_f64_fneg2(double %a, double %b, double %c) { ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_max_f64 v[6:7], v[0:1], v[2:3] ; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000 -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[2:3] ; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v8, v7, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v6, vcc ; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], -v[4:5] ; GFX9-NEXT: v_cmp_u_f64_e64 vcc, v[0:1], -v[4:5] ; GFX9-NEXT: s_nop 1 @@ -3591,15 +3591,15 @@ define double @v_fmaximum3_f64_const0(double %b, double %c) { ; GFX9-NEXT: s_mov_b32 s1, 0x40200000 ; GFX9-NEXT: v_max_f64 v[4:5], v[0:1], s[0:1] ; GFX9-NEXT: v_mov_b32_e32 v6, 0x7ff80000 -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[0:1] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[0:1] ; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc ; GFX9-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3] -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[2:3] ; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc -; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc ; GFX9-NEXT: s_setpc_b64 s[30:31] %max0 = call double @llvm.maximum.f64(double 8.0, double %b) %max1 = call double @llvm.maximum.f64(double %max0, double %c) @@ -3624,16 +3624,16 @@ define double @v_fmaximum3_f64__const2(double %a, double %b) { ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3] ; GFX9-NEXT: v_mov_b32_e32 v6, 0x7ff80000 -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[2:3] ; GFX9-NEXT: s_mov_b32 s0, 0 ; GFX9-NEXT: s_mov_b32 s1, 0x40200000 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc ; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], s[0:1] -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[0:1] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[0:1] ; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v6, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v6, v3, vcc ; GFX9-NEXT: s_setpc_b64 s[30:31] %max0 = call double @llvm.maximum.f64(double %a, double %b) %max1 = call double @llvm.maximum.f64(double %max0, double 8.0) @@ -3658,15 +3658,15 @@ define double @v_fmaximum3_f64_inlineimm0(double %b, double %c) { ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_max_f64 v[4:5], v[0:1], 4.0 ; GFX9-NEXT: v_mov_b32_e32 v6, 0x7ff80000 -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[0:1] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[0:1] ; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc ; GFX9-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3] -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[2:3] ; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc -; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc ; GFX9-NEXT: s_setpc_b64 s[30:31] %max0 = call double @llvm.maximum.f64(double 4.0, double %b) %max1 = call double @llvm.maximum.f64(double %max0, double %c) @@ -3691,15 +3691,15 @@ define double @v_fmaximum3_f64__inlineimm(double %a, double %b) { ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3] ; GFX9-NEXT: v_mov_b32_e32 v6, 0x7ff80000 -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[2:3] ; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc ; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], 4.0 -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[0:1] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[0:1] ; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v6, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v6, v3, vcc ; GFX9-NEXT: s_setpc_b64 s[30:31] %max0 = call double @llvm.maximum.f64(double %a, double %b) %max1 = call double @llvm.maximum.f64(double %max0, double 4.0) @@ -3726,16 +3726,16 @@ define double @v_fmaximum3_f64_const1_const2(double %a) { ; GFX9-NEXT: s_mov_b32 s1, 0x40200000 ; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], s[0:1] ; GFX9-NEXT: v_mov_b32_e32 v4, 0x7ff80000 -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[0:1] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[0:1] ; GFX9-NEXT: s_mov_b32 s0, 0 ; GFX9-NEXT: s_mov_b32 s1, 0x40300000 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], s[0:1] -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[0:1] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[0:1] ; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc ; GFX9-NEXT: s_setpc_b64 s[30:31] %max0 = call double @llvm.maximum.f64(double %a, double 8.0) %max1 = call double @llvm.maximum.f64(double %max0, double 16.0) @@ -4003,15 +4003,15 @@ define <2 x double> @v_no_fmaximum3_f64__multi_use(double %a, double %b, double ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_max_f64 v[6:7], v[0:1], v[2:3] ; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000 -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[2:3] ; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v8, v7, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v6, vcc ; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], v[4:5] -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[4:5] ; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc -; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v8, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v3, v8, v3, vcc ; GFX9-NEXT: s_setpc_b64 s[30:31] %max0 = call double @llvm.maximum.f64(double %a, double %b) %max1 = call double @llvm.maximum.f64(double %max0, double %c) diff --git a/llvm/test/CodeGen/AMDGPU/fmed3.ll b/llvm/test/CodeGen/AMDGPU/fmed3.ll index cbb07672be8ec..22b6225eba76d 100644 --- a/llvm/test/CodeGen/AMDGPU/fmed3.ll +++ b/llvm/test/CodeGen/AMDGPU/fmed3.ll @@ -1032,10 +1032,10 @@ define amdgpu_kernel void @v_test_legacy_fmed3_r_i_i_f32(ptr addrspace(1) %out, ; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) ; VI-GISEL-NEXT: v_add_f32_e32 v2, 1.0, v3 -; VI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, 2.0, v2 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, 2.0, vcc -; VI-GISEL-NEXT: v_cmp_ngt_f32_e32 vcc, 4.0, v2 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, 4.0, vcc +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, 2.0, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 2.0, v2, vcc +; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, 4.0, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 4.0, v2, vcc ; VI-GISEL-NEXT: flat_store_dword v[0:1], v2 ; VI-GISEL-NEXT: s_endpgm ; @@ -1059,10 +1059,10 @@ define amdgpu_kernel void @v_test_legacy_fmed3_r_i_i_f32(ptr addrspace(1) %out, ; GFX9-GISEL-NEXT: global_load_dword v1, v0, s[2:3] ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) ; GFX9-GISEL-NEXT: v_add_f32_e32 v1, 1.0, v1 -; GFX9-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, 2.0, v1 -; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 2.0, vcc -; GFX9-GISEL-NEXT: v_cmp_ngt_f32_e32 vcc, 4.0, v1 -; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 4.0, vcc +; GFX9-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, 2.0, v1 +; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, 2.0, v1, vcc +; GFX9-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, 4.0, v1 +; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, 4.0, v1, vcc ; GFX9-GISEL-NEXT: global_store_dword v0, v1, s[0:1] ; GFX9-GISEL-NEXT: s_endpgm ; @@ -1090,11 +1090,11 @@ define amdgpu_kernel void @v_test_legacy_fmed3_r_i_i_f32(ptr addrspace(1) %out, ; GFX11-GISEL-NEXT: global_load_b32 v1, v0, s[2:3] ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) ; GFX11-GISEL-NEXT: v_add_f32_e32 v1, 1.0, v1 -; GFX11-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc_lo, 2.0, v1 -; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 2.0, vcc_lo +; GFX11-GISEL-NEXT: v_cmp_lt_f32_e32 vcc_lo, 2.0, v1 +; GFX11-GISEL-NEXT: v_cndmask_b32_e32 v1, 2.0, v1, vcc_lo ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-GISEL-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 4.0, v1 -; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 4.0, vcc_lo +; GFX11-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 4.0, v1 +; GFX11-GISEL-NEXT: v_cndmask_b32_e32 v1, 4.0, v1, vcc_lo ; GFX11-GISEL-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-GISEL-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() diff --git a/llvm/test/CodeGen/AMDGPU/fminimum3.ll b/llvm/test/CodeGen/AMDGPU/fminimum3.ll index d1d0c0dcdb7e0..91d24121dbdeb 100644 --- a/llvm/test/CodeGen/AMDGPU/fminimum3.ll +++ b/llvm/test/CodeGen/AMDGPU/fminimum3.ll @@ -3167,15 +3167,15 @@ define double @v_fminimum3_f64(double %a, double %b, double %c) { ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_min_f64 v[6:7], v[0:1], v[2:3] ; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000 -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[2:3] ; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v8, v7, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v6, vcc ; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], v[4:5] -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[4:5] ; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v8, v3, vcc ; GFX9-NEXT: s_setpc_b64 s[30:31] %max0 = call double @llvm.minimum.f64(double %a, double %b) %max1 = call double @llvm.minimum.f64(double %max0, double %c) @@ -3200,15 +3200,15 @@ define double @v_fminimum3_f64_commute(double %a, double %b, double %c) { ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_min_f64 v[6:7], v[0:1], v[2:3] ; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000 -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[2:3] ; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v8, v7, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v6, vcc ; GFX9-NEXT: v_min_f64 v[2:3], v[4:5], v[0:1] -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[0:1] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[4:5], v[0:1] ; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v8, v3, vcc ; GFX9-NEXT: s_setpc_b64 s[30:31] %max0 = call double @llvm.minimum.f64(double %a, double %b) %max1 = call double @llvm.minimum.f64(double %c, double %max0) @@ -3232,15 +3232,15 @@ define amdgpu_ps <2 x i32> @s_fminimum3_f64(double inreg %a, double inreg %b, do ; GFX9-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX9-NEXT: v_min_f64 v[2:3], s[0:1], v[0:1] ; GFX9-NEXT: v_mov_b32_e32 v4, 0x7ff80000 -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, s[0:1], v[0:1] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, s[0:1], v[0:1] ; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], s[4:5] -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, s[4:5], v[0:1] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, s[4:5], v[0:1] ; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v4, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v1, v2, 0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc ; GFX9-NEXT: v_readfirstlane_b32 s1, v0 ; GFX9-NEXT: v_readfirstlane_b32 s0, v1 ; GFX9-NEXT: ; return to shader part epilog @@ -3279,10 +3279,10 @@ define double @v_fminimum3_f64_fabs0(double %a, double %b, double %c) { ; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc ; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc ; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], v[4:5] -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[4:5] ; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v8, v3, vcc ; GFX9-NEXT: s_setpc_b64 s[30:31] %a.fabs = call double @llvm.fabs.f64(double %a) %max0 = call double @llvm.minimum.f64(double %a.fabs, double %b) @@ -3313,10 +3313,10 @@ define double @v_fminimum3_f64_fabs1(double %a, double %b, double %c) { ; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc ; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc ; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], v[4:5] -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[4:5] ; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v8, v3, vcc ; GFX9-NEXT: s_setpc_b64 s[30:31] %b.fabs = call double @llvm.fabs.f64(double %b) %max0 = call double @llvm.minimum.f64(double %a, double %b.fabs) @@ -3342,10 +3342,10 @@ define double @v_fminimum3_f64_fabs2(double %a, double %b, double %c) { ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_min_f64 v[6:7], v[0:1], v[2:3] ; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000 -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[2:3] ; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v8, v7, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v6, vcc ; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], |v[4:5]| ; GFX9-NEXT: v_cmp_u_f64_e64 vcc, v[0:1], |v[4:5]| ; GFX9-NEXT: s_nop 1 @@ -3492,10 +3492,10 @@ define double @v_fminimum3_f64_fneg0(double %a, double %b, double %c) { ; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc ; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc ; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], v[4:5] -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[4:5] ; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v8, v3, vcc ; GFX9-NEXT: s_setpc_b64 s[30:31] %a.fneg = fneg double %a %max0 = call double @llvm.minimum.f64(double %a.fneg, double %b) @@ -3526,10 +3526,10 @@ define double @v_fminimum3_f64_fneg1(double %a, double %b, double %c) { ; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc ; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc ; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], v[4:5] -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[4:5] ; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v8, v3, vcc ; GFX9-NEXT: s_setpc_b64 s[30:31] %b.fneg = fneg double %b %max0 = call double @llvm.minimum.f64(double %a, double %b.fneg) @@ -3555,10 +3555,10 @@ define double @v_fminimum3_f64_fneg2(double %a, double %b, double %c) { ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_min_f64 v[6:7], v[0:1], v[2:3] ; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000 -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[2:3] ; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v8, v7, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v6, vcc ; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], -v[4:5] ; GFX9-NEXT: v_cmp_u_f64_e64 vcc, v[0:1], -v[4:5] ; GFX9-NEXT: s_nop 1 @@ -3591,15 +3591,15 @@ define double @v_fminimum3_f64_const0(double %b, double %c) { ; GFX9-NEXT: s_mov_b32 s1, 0x40200000 ; GFX9-NEXT: v_min_f64 v[4:5], v[0:1], s[0:1] ; GFX9-NEXT: v_mov_b32_e32 v6, 0x7ff80000 -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[0:1] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[0:1] ; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc ; GFX9-NEXT: v_min_f64 v[4:5], v[0:1], v[2:3] -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[2:3] ; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc -; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc ; GFX9-NEXT: s_setpc_b64 s[30:31] %max0 = call double @llvm.minimum.f64(double 8.0, double %b) %max1 = call double @llvm.minimum.f64(double %max0, double %c) @@ -3624,16 +3624,16 @@ define double @v_fminimum3_f64__const2(double %a, double %b) { ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_min_f64 v[4:5], v[0:1], v[2:3] ; GFX9-NEXT: v_mov_b32_e32 v6, 0x7ff80000 -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[2:3] ; GFX9-NEXT: s_mov_b32 s0, 0 ; GFX9-NEXT: s_mov_b32 s1, 0x40200000 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc ; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], s[0:1] -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[0:1] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[0:1] ; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v6, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v6, v3, vcc ; GFX9-NEXT: s_setpc_b64 s[30:31] %max0 = call double @llvm.minimum.f64(double %a, double %b) %max1 = call double @llvm.minimum.f64(double %max0, double 8.0) @@ -3658,15 +3658,15 @@ define double @v_fminimum3_f64_inlineimm0(double %b, double %c) { ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_min_f64 v[4:5], v[0:1], 4.0 ; GFX9-NEXT: v_mov_b32_e32 v6, 0x7ff80000 -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[0:1] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[0:1] ; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc ; GFX9-NEXT: v_min_f64 v[4:5], v[0:1], v[2:3] -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[2:3] ; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc -; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc ; GFX9-NEXT: s_setpc_b64 s[30:31] %max0 = call double @llvm.minimum.f64(double 4.0, double %b) %max1 = call double @llvm.minimum.f64(double %max0, double %c) @@ -3691,15 +3691,15 @@ define double @v_fminimum3_f64__inlineimm(double %a, double %b) { ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_min_f64 v[4:5], v[0:1], v[2:3] ; GFX9-NEXT: v_mov_b32_e32 v6, 0x7ff80000 -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[2:3] ; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc ; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], 4.0 -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[0:1] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[0:1] ; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v6, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v6, v3, vcc ; GFX9-NEXT: s_setpc_b64 s[30:31] %max0 = call double @llvm.minimum.f64(double %a, double %b) %max1 = call double @llvm.minimum.f64(double %max0, double 4.0) @@ -3726,16 +3726,16 @@ define double @v_fminimum3_f64_const1_const2(double %a) { ; GFX9-NEXT: s_mov_b32 s1, 0x40200000 ; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], s[0:1] ; GFX9-NEXT: v_mov_b32_e32 v4, 0x7ff80000 -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[0:1] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[0:1] ; GFX9-NEXT: s_mov_b32 s0, 0 ; GFX9-NEXT: s_mov_b32 s1, 0x40300000 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], s[0:1] -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[0:1] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[0:1] ; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc ; GFX9-NEXT: s_setpc_b64 s[30:31] %max0 = call double @llvm.minimum.f64(double %a, double 8.0) %max1 = call double @llvm.minimum.f64(double %max0, double 16.0) @@ -4003,15 +4003,15 @@ define <2 x double> @v_no_fminimum3_f64__multi_use(double %a, double %b, double ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_min_f64 v[6:7], v[0:1], v[2:3] ; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000 -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[2:3] ; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v8, v7, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v6, vcc ; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], v[4:5] -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[4:5] ; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc -; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v8, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v3, v8, v3, vcc ; GFX9-NEXT: s_setpc_b64 s[30:31] %max0 = call double @llvm.minimum.f64(double %a, double %b) %max1 = call double @llvm.minimum.f64(double %max0, double %c) diff --git a/llvm/test/CodeGen/AMDGPU/fmul-to-ldexp.ll b/llvm/test/CodeGen/AMDGPU/fmul-to-ldexp.ll index 12daf10594df5..fb623742804f8 100644 --- a/llvm/test/CodeGen/AMDGPU/fmul-to-ldexp.ll +++ b/llvm/test/CodeGen/AMDGPU/fmul-to-ldexp.ll @@ -4547,8 +4547,8 @@ define float @v_contract_mul_add_f32_select_1_64(i32 %arg, float %x, float %y) { ; GFX9-SDAG: ; %bb.0: ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0x42800000 -; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v0, v3, 1.0, vcc +; GFX9-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v0, 1.0, v3, vcc ; GFX9-SDAG-NEXT: v_fma_f32 v0, v1, v0, v2 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] ; @@ -4665,8 +4665,8 @@ define float @v_contract_mul_add_f32_select_n1_n64(i32 %arg, float %x, float %y) ; GFX9-SDAG: ; %bb.0: ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0xc2800000 -; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v0, v3, -1.0, vcc +; GFX9-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v0, -1.0, v3, vcc ; GFX9-SDAG-NEXT: v_fma_f32 v0, v1, v0, v2 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] ; @@ -4871,8 +4871,8 @@ define float @v_contract_mul_add_f32_select_4_128(i32 %arg, float %x, float %y) ; GFX9-SDAG: ; %bb.0: ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0x43000000 -; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v0, v3, 4.0, vcc +; GFX9-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v0, 4.0, v3, vcc ; GFX9-SDAG-NEXT: v_fma_f32 v0, v1, v0, v2 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] ; @@ -5519,8 +5519,8 @@ define double @v_contract_mul_add_f64_select_2_4(i32 %arg, double %x, double %y) ; GFX9-SDAG: ; %bb.0: ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-SDAG-NEXT: v_mov_b32_e32 v5, 0x40100000 -; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v6, v5, 2.0, vcc +; GFX9-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v6, 2.0, v5, vcc ; GFX9-SDAG-NEXT: v_mov_b32_e32 v5, 0 ; GFX9-SDAG-NEXT: v_fma_f64 v[0:1], v[1:2], v[5:6], v[3:4] ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll b/llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll index f199db3ca12ca..0084125fa408b 100644 --- a/llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll @@ -607,8 +607,8 @@ define amdgpu_ps half @fneg_fadd_0_f16(half inreg %tmp2, half inreg %tmp6, <4 x ; SI-SAFE-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v1 ; SI-SAFE-NEXT: v_cndmask_b32_e64 v0, -v0, v1, vcc ; SI-SAFE-NEXT: v_mov_b32_e32 v1, 0x7fc00000 -; SI-SAFE-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v0 -; SI-SAFE-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; SI-SAFE-NEXT: v_cmp_lt_f32_e32 vcc, 0, v0 +; SI-SAFE-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; SI-SAFE-NEXT: ; return to shader part epilog ; ; SI-NSZ-LABEL: fneg_fadd_0_f16: @@ -635,8 +635,8 @@ define amdgpu_ps half @fneg_fadd_0_f16(half inreg %tmp2, half inreg %tmp6, <4 x ; SI-NSZ-NEXT: v_cmp_nlt_f32_e64 vcc, -v0, v1 ; SI-NSZ-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; SI-NSZ-NEXT: v_mov_b32_e32 v1, 0x7fc00000 -; SI-NSZ-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v0 -; SI-NSZ-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; SI-NSZ-NEXT: v_cmp_lt_f32_e32 vcc, 0, v0 +; SI-NSZ-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; SI-NSZ-NEXT: ; return to shader part epilog ; ; VI-SAFE-LABEL: fneg_fadd_0_f16: @@ -744,8 +744,8 @@ define amdgpu_ps half @fneg_fadd_0_nsz_f16(half inreg %tmp2, half inreg %tmp6, < ; SI-SAFE-NEXT: v_mov_b32_e32 v1, 0x7fc00000 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-SAFE-NEXT: v_min_legacy_f32_e32 v0, 0, v0 -; SI-SAFE-NEXT: v_cmp_ngt_f32_e32 vcc, s0, v0 -; SI-SAFE-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; SI-SAFE-NEXT: v_cmp_gt_f32_e32 vcc, s0, v0 +; SI-SAFE-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; SI-SAFE-NEXT: ; return to shader part epilog ; ; SI-NSZ-LABEL: fneg_fadd_0_nsz_f16: @@ -759,8 +759,8 @@ define amdgpu_ps half @fneg_fadd_0_nsz_f16(half inreg %tmp2, half inreg %tmp6, < ; SI-NSZ-NEXT: v_mul_f32_e32 v0, 0x80000000, v0 ; SI-NSZ-NEXT: v_cmp_nlt_f32_e64 vcc, -v0, v1 ; SI-NSZ-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc -; SI-NSZ-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v0 -; SI-NSZ-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; SI-NSZ-NEXT: v_cmp_lt_f32_e32 vcc, 0, v0 +; SI-NSZ-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; SI-NSZ-NEXT: ; return to shader part epilog ; ; VI-SAFE-LABEL: fneg_fadd_0_nsz_f16: diff --git a/llvm/test/CodeGen/AMDGPU/fneg-combines.ll b/llvm/test/CodeGen/AMDGPU/fneg-combines.ll index 14c18df6d8e41..7ec56a8ad38bd 100644 --- a/llvm/test/CodeGen/AMDGPU/fneg-combines.ll +++ b/llvm/test/CodeGen/AMDGPU/fneg-combines.ll @@ -254,8 +254,8 @@ define amdgpu_ps float @fneg_fadd_0(float inreg %tmp2, float inreg %tmp6, <4 x i ; GCN-NSZ-DAG: v_mul_f32_e32 [[D:v[0-9]+]], 0, [[A]] ; GCN-NSZ-DAG: v_cmp_ngt_f32_e32 {{.*}}, s{{[0-9]+}}, [[D]] ; GCN-NSZ-DAG: v_cndmask_b32_e64 [[E:v[0-9]+]], -[[D]], v{{[0-9]+}}, -; GCN-NSZ-DAG: v_cmp_nlt_f32_e32 {{.*}}, 0 -; GCN-NSZ-DAG: v_cndmask_b32_e64 v{{[0-9]+}}, [[C]], 0, +; GCN-NSZ-DAG: v_cmp_lt_f32_e32 {{.*}}, 0 +; GCN-NSZ-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}}, vcc define amdgpu_ps float @fneg_fadd_0_nsz(float inreg %tmp2, float inreg %tmp6, <4 x i32> %arg) local_unnamed_addr #2 { .entry: %tmp7 = fdiv afn float 1.000000e+00, %tmp6 diff --git a/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll b/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll index 46da9d33639b6..84573b76514e5 100644 --- a/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll +++ b/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll @@ -197,8 +197,8 @@ define amdgpu_ps float @fneg_fadd_0_f32(float inreg %tmp2, float inreg %tmp6, <4 ; SI-SAFE-NEXT: v_cmp_ngt_f32_e32 vcc, s0, v0 ; SI-SAFE-NEXT: v_cndmask_b32_e64 v0, -v0, v1, vcc ; SI-SAFE-NEXT: v_mov_b32_e32 v1, 0x7fc00000 -; SI-SAFE-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v0 -; SI-SAFE-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; SI-SAFE-NEXT: v_cmp_lt_f32_e32 vcc, 0, v0 +; SI-SAFE-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; SI-SAFE-NEXT: ; return to shader part epilog ; ; SI-NSZ-LABEL: fneg_fadd_0_f32: @@ -221,8 +221,8 @@ define amdgpu_ps float @fneg_fadd_0_f32(float inreg %tmp2, float inreg %tmp6, <4 ; SI-NSZ-NEXT: v_cmp_ngt_f32_e32 vcc, s0, v0 ; SI-NSZ-NEXT: v_cndmask_b32_e64 v0, -v0, v1, vcc ; SI-NSZ-NEXT: v_mov_b32_e32 v1, 0x7fc00000 -; SI-NSZ-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v0 -; SI-NSZ-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; SI-NSZ-NEXT: v_cmp_lt_f32_e32 vcc, 0, v0 +; SI-NSZ-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; SI-NSZ-NEXT: ; return to shader part epilog ; ; VI-SAFE-LABEL: fneg_fadd_0_f32: @@ -245,8 +245,8 @@ define amdgpu_ps float @fneg_fadd_0_f32(float inreg %tmp2, float inreg %tmp6, <4 ; VI-SAFE-NEXT: v_mad_f32 v0, v0, 0, 0 ; VI-SAFE-NEXT: v_cmp_ngt_f32_e32 vcc, s0, v0 ; VI-SAFE-NEXT: v_cndmask_b32_e64 v0, -v0, v2, vcc -; VI-SAFE-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v0 -; VI-SAFE-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; VI-SAFE-NEXT: v_cmp_lt_f32_e32 vcc, 0, v0 +; VI-SAFE-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; VI-SAFE-NEXT: ; return to shader part epilog ; ; VI-NSZ-LABEL: fneg_fadd_0_f32: @@ -269,8 +269,8 @@ define amdgpu_ps float @fneg_fadd_0_f32(float inreg %tmp2, float inreg %tmp6, <4 ; VI-NSZ-NEXT: v_mul_f32_e32 v0, 0, v0 ; VI-NSZ-NEXT: v_cmp_ngt_f32_e32 vcc, s0, v0 ; VI-NSZ-NEXT: v_cndmask_b32_e64 v0, -v0, v2, vcc -; VI-NSZ-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v0 -; VI-NSZ-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; VI-NSZ-NEXT: v_cmp_lt_f32_e32 vcc, 0, v0 +; VI-NSZ-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; VI-NSZ-NEXT: ; return to shader part epilog .entry: %tmp7 = fdiv float 1.000000e+00, %tmp6 @@ -294,8 +294,8 @@ define amdgpu_ps float @fneg_fadd_0_nsz_f32(float inreg %tmp2, float inreg %tmp6 ; SI-SAFE-NEXT: v_min_legacy_f32_e64 v0, 0, s0 ; SI-SAFE-NEXT: s_brev_b32 s0, 1 ; SI-SAFE-NEXT: v_mov_b32_e32 v1, 0x7fc00000 -; SI-SAFE-NEXT: v_cmp_ngt_f32_e32 vcc, s0, v0 -; SI-SAFE-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; SI-SAFE-NEXT: v_cmp_gt_f32_e32 vcc, s0, v0 +; SI-SAFE-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; SI-SAFE-NEXT: ; return to shader part epilog ; ; GCN-NSZ-LABEL: fneg_fadd_0_nsz_f32: @@ -306,8 +306,8 @@ define amdgpu_ps float @fneg_fadd_0_nsz_f32(float inreg %tmp2, float inreg %tmp6 ; GCN-NSZ-NEXT: v_cmp_ngt_f32_e32 vcc, s0, v0 ; GCN-NSZ-NEXT: v_cndmask_b32_e64 v0, -v0, v1, vcc ; GCN-NSZ-NEXT: v_mov_b32_e32 v1, 0x7fc00000 -; GCN-NSZ-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v0 -; GCN-NSZ-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; GCN-NSZ-NEXT: v_cmp_lt_f32_e32 vcc, 0, v0 +; GCN-NSZ-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; GCN-NSZ-NEXT: ; return to shader part epilog ; ; VI-SAFE-LABEL: fneg_fadd_0_nsz_f32: @@ -319,8 +319,8 @@ define amdgpu_ps float @fneg_fadd_0_nsz_f32(float inreg %tmp2, float inreg %tmp6 ; VI-SAFE-NEXT: v_cmp_ngt_f32_e32 vcc, s0, v0 ; VI-SAFE-NEXT: v_cndmask_b32_e64 v0, -v0, v1, vcc ; VI-SAFE-NEXT: v_mov_b32_e32 v1, 0x7fc00000 -; VI-SAFE-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v0 -; VI-SAFE-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; VI-SAFE-NEXT: v_cmp_lt_f32_e32 vcc, 0, v0 +; VI-SAFE-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; VI-SAFE-NEXT: ; return to shader part epilog .entry: %tmp7 = fdiv afn float 1.000000e+00, %tmp6 @@ -3998,9 +3998,9 @@ define float @v_fneg_select_infloop_regression_f32(float %arg, i1 %arg1) { ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: v_and_b32_e32 v1, 1, v1 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1 -; GCN-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc -; GCN-NEXT: v_cndmask_b32_e64 v0, -v0, 0, vcc +; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 1, v1 +; GCN-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; GCN-NEXT: v_cndmask_b32_e64 v0, 0, -v0, vcc ; GCN-NEXT: s_setpc_b64 s[30:31] %i = select i1 %arg1, float 0.0, float %arg %i2 = fneg float %i @@ -4028,9 +4028,9 @@ define float @v_fneg_select_infloop_regression_f32_commute1(float %arg, i1 %arg1 ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: v_and_b32_e32 v1, 1, v1 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1 -; GCN-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc -; GCN-NEXT: v_cndmask_b32_e64 v0, 0, -v0, vcc +; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 1, v1 +; GCN-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; GCN-NEXT: v_cndmask_b32_e64 v0, -v0, 0, vcc ; GCN-NEXT: s_setpc_b64 s[30:31] %i = select i1 %arg1, float 0.0, float %arg %i2 = fneg float %i @@ -4059,9 +4059,9 @@ define float @v_fneg_select_infloop_regression_inline_imm_f32(float %arg, i1 %ar ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: v_and_b32_e32 v1, 1, v1 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1 -; GCN-NEXT: v_cndmask_b32_e64 v0, v0, 2.0, vcc -; GCN-NEXT: v_cndmask_b32_e64 v0, -v0, 2.0, vcc +; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 1, v1 +; GCN-NEXT: v_cndmask_b32_e32 v0, 2.0, v0, vcc +; GCN-NEXT: v_cndmask_b32_e64 v0, 2.0, -v0, vcc ; GCN-NEXT: s_setpc_b64 s[30:31] %i = select i1 %arg1, float 2.0, float %arg %i2 = fneg float %i @@ -4089,9 +4089,9 @@ define float @v_fneg_select_infloop_regression_inline_imm_f32_commute1(float %ar ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: v_and_b32_e32 v1, 1, v1 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1 -; GCN-NEXT: v_cndmask_b32_e64 v0, v0, 2.0, vcc -; GCN-NEXT: v_cndmask_b32_e64 v0, 2.0, -v0, vcc +; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 1, v1 +; GCN-NEXT: v_cndmask_b32_e32 v0, 2.0, v0, vcc +; GCN-NEXT: v_cndmask_b32_e64 v0, -v0, 2.0, vcc ; GCN-NEXT: s_setpc_b64 s[30:31] %i = select i1 %arg1, float 2.0, float %arg %i2 = fneg float %i @@ -4120,9 +4120,9 @@ define float @v_fneg_select_infloop_regression_neg_inline_imm_f32(float %arg, i1 ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: v_and_b32_e32 v1, 1, v1 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1 -; GCN-NEXT: v_cndmask_b32_e64 v0, v0, -2.0, vcc -; GCN-NEXT: v_cndmask_b32_e64 v0, -v0, -2.0, vcc +; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 1, v1 +; GCN-NEXT: v_cndmask_b32_e32 v0, -2.0, v0, vcc +; GCN-NEXT: v_cndmask_b32_e64 v0, -2.0, -v0, vcc ; GCN-NEXT: s_setpc_b64 s[30:31] %i = select i1 %arg1, float -2.0, float %arg %i2 = fneg float %i @@ -4150,9 +4150,9 @@ define float @v_fneg_select_infloop_regression_neg_inline_imm_f32_commute1(float ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: v_and_b32_e32 v1, 1, v1 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1 -; GCN-NEXT: v_cndmask_b32_e64 v0, v0, -2.0, vcc -; GCN-NEXT: v_cndmask_b32_e64 v0, -2.0, -v0, vcc +; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 1, v1 +; GCN-NEXT: v_cndmask_b32_e32 v0, -2.0, v0, vcc +; GCN-NEXT: v_cndmask_b32_e64 v0, -v0, -2.0, vcc ; GCN-NEXT: s_setpc_b64 s[30:31] %i = select i1 %arg1, float -2.0, float %arg %i2 = fneg float %i @@ -4228,10 +4228,10 @@ define double @v_fneg_select_infloop_regression_f64(double %arg, i1 %arg1) { ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: v_and_b32_e32 v2, 1, v2 ; GCN-NEXT: v_bfrev_b32_e32 v3, 1 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 -; GCN-NEXT: v_cndmask_b32_e64 v1, -v1, v3, vcc -; GCN-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc -; GCN-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 1, v2 +; GCN-NEXT: v_cndmask_b32_e64 v1, v3, -v1, vcc +; GCN-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; GCN-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GCN-NEXT: s_setpc_b64 s[30:31] %i = select i1 %arg1, double 0.0, double %arg %i2 = fneg double %i @@ -4284,20 +4284,20 @@ define half @v_fneg_select_infloop_regression_f16(half %arg, i1 %arg1) { ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 ; SI-NEXT: v_and_b32_e32 v1, 1, v1 -; SI-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1 +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 1, v1 ; SI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; SI-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc -; SI-NEXT: v_cndmask_b32_e64 v0, -v0, 0, vcc +; SI-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; SI-NEXT: v_cndmask_b32_e64 v0, 0, -v0, vcc ; SI-NEXT: s_setpc_b64 s[30:31] ; ; VI-LABEL: v_fneg_select_infloop_regression_f16: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-NEXT: v_and_b32_e32 v1, 1, v1 -; VI-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1 -; VI-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; VI-NEXT: v_cmp_ne_u32_e32 vcc, 1, v1 +; VI-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc ; VI-NEXT: v_xor_b32_e32 v0, 0x8000, v0 -; VI-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; VI-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc ; VI-NEXT: s_setpc_b64 s[30:31] %i = select i1 %arg1, half 0.0, half %arg %i2 = fneg half %i @@ -4351,10 +4351,10 @@ define <2 x half> @v_fneg_select_infloop_regression_v2f16(<2 x half> %arg, i1 %a ; SI-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; SI-NEXT: v_or_b32_e32 v0, v0, v1 ; SI-NEXT: v_and_b32_e32 v1, 1, v2 -; SI-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1 -; SI-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 1, v1 +; SI-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc ; SI-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 -; SI-NEXT: v_cndmask_b32_e64 v1, v0, 0, vcc +; SI-NEXT: v_cndmask_b32_e32 v1, 0, v0, vcc ; SI-NEXT: v_cvt_f32_f16_e32 v0, v1 ; SI-NEXT: v_lshrrev_b32_e32 v1, 16, v1 ; SI-NEXT: v_cvt_f32_f16_e32 v1, v1 @@ -4364,10 +4364,10 @@ define <2 x half> @v_fneg_select_infloop_regression_v2f16(<2 x half> %arg, i1 %a ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-NEXT: v_and_b32_e32 v1, 1, v1 -; VI-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1 -; VI-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; VI-NEXT: v_cmp_ne_u32_e32 vcc, 1, v1 +; VI-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc ; VI-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 -; VI-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; VI-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc ; VI-NEXT: s_setpc_b64 s[30:31] %i = select i1 %arg1, <2 x half> zeroinitializer, <2 x half> %arg %i2 = fneg <2 x half> %i @@ -4426,11 +4426,11 @@ define <2 x float> @v_fneg_select_infloop_regression_v2f32(<2 x float> %arg, i1 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: v_and_b32_e32 v2, 1, v2 ; GCN-NEXT: v_bfrev_b32_e32 v3, 1 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 -; GCN-NEXT: v_cndmask_b32_e64 v1, -v1, v3, vcc -; GCN-NEXT: v_cndmask_b32_e64 v0, -v0, v3, vcc -; GCN-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc -; GCN-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 1, v2 +; GCN-NEXT: v_cndmask_b32_e64 v1, v3, -v1, vcc +; GCN-NEXT: v_cndmask_b32_e64 v0, v3, -v0, vcc +; GCN-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; GCN-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GCN-NEXT: s_setpc_b64 s[30:31] %i = select i1 %arg1, <2 x float> zeroinitializer, <2 x float> %arg %i2 = fneg <2 x float> %i @@ -4478,9 +4478,9 @@ define float @v_fabs_select_infloop_regression_f32(float %arg, i1 %arg1) { ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: v_and_b32_e32 v1, 1, v1 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1 -; GCN-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc -; GCN-NEXT: v_cndmask_b32_e64 v0, |v0|, 0, vcc +; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 1, v1 +; GCN-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; GCN-NEXT: v_cndmask_b32_e64 v0, 0, |v0|, vcc ; GCN-NEXT: s_setpc_b64 s[30:31] %i = select i1 %arg1, float 0.0, float %arg %i2 = call float @llvm.fabs.f32(float %i) @@ -4529,9 +4529,9 @@ define float @v_fneg_fabs_select_infloop_regression(float %arg, i1 %arg1) { ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: v_and_b32_e32 v1, 1, v1 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1 -; GCN-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc -; GCN-NEXT: v_cndmask_b32_e64 v0, -|v0|, 0, vcc +; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 1, v1 +; GCN-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; GCN-NEXT: v_cndmask_b32_e64 v0, 0, -|v0|, vcc ; GCN-NEXT: s_setpc_b64 s[30:31] %i = select i1 %arg1, float 0.0, float %arg %i2 = call float @llvm.fabs.f32(float %i) diff --git a/llvm/test/CodeGen/AMDGPU/fptoi.i128.ll b/llvm/test/CodeGen/AMDGPU/fptoi.i128.ll index 3465c782bd700..0ff60af86135b 100644 --- a/llvm/test/CodeGen/AMDGPU/fptoi.i128.ll +++ b/llvm/test/CodeGen/AMDGPU/fptoi.i128.ll @@ -241,10 +241,10 @@ define i128 @fptosi_f64_to_i128(double %x) { ; GISEL-NEXT: v_sub_u32_e32 v2, 64, v7 ; GISEL-NEXT: v_lshrrev_b64 v[2:3], v2, v[4:5] ; GISEL-NEXT: v_lshlrev_b64 v[4:5], v6, v[4:5] -; GISEL-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v7 +; GISEL-NEXT: v_cmp_ne_u32_e64 s[6:7], 0, v7 ; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[8:9], v11, v9, v[0:1] ; GISEL-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v12, v2, 0, s[6:7] +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, v2, s[6:7] ; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[8:9], v12, v8, v[6:7] ; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[8:9], v10, v8, 0 ; GISEL-NEXT: v_mov_b32_e32 v2, v6 @@ -256,7 +256,7 @@ define i128 @fptosi_f64_to_i128(double %x) { ; GISEL-NEXT: v_addc_co_u32_e64 v6, s[10:11], v7, v6, s[10:11] ; GISEL-NEXT: v_addc_co_u32_e64 v4, s[8:9], v6, v4, s[8:9] ; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[8:9], v12, v9, v[4:5] -; GISEL-NEXT: v_cndmask_b32_e64 v3, v3, 0, s[6:7] +; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, v3, s[6:7] ; GISEL-NEXT: ; implicit-def: $vgpr9 ; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[6:7], v3, v8, v[6:7] ; GISEL-NEXT: ; implicit-def: $vgpr6 @@ -605,10 +605,10 @@ define i128 @fptoui_f64_to_i128(double %x) { ; GISEL-NEXT: v_sub_u32_e32 v2, 64, v7 ; GISEL-NEXT: v_lshrrev_b64 v[2:3], v2, v[4:5] ; GISEL-NEXT: v_lshlrev_b64 v[4:5], v6, v[4:5] -; GISEL-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v7 +; GISEL-NEXT: v_cmp_ne_u32_e64 s[6:7], 0, v7 ; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[8:9], v11, v9, v[0:1] ; GISEL-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v12, v2, 0, s[6:7] +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, v2, s[6:7] ; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[8:9], v12, v8, v[6:7] ; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[8:9], v10, v8, 0 ; GISEL-NEXT: v_mov_b32_e32 v2, v6 @@ -620,7 +620,7 @@ define i128 @fptoui_f64_to_i128(double %x) { ; GISEL-NEXT: v_addc_co_u32_e64 v6, s[10:11], v7, v6, s[10:11] ; GISEL-NEXT: v_addc_co_u32_e64 v4, s[8:9], v6, v4, s[8:9] ; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[8:9], v12, v9, v[4:5] -; GISEL-NEXT: v_cndmask_b32_e64 v3, v3, 0, s[6:7] +; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, v3, s[6:7] ; GISEL-NEXT: ; implicit-def: $vgpr9 ; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[6:7], v3, v8, v[6:7] ; GISEL-NEXT: ; implicit-def: $vgpr6 @@ -962,10 +962,10 @@ define i128 @fptosi_f32_to_i128(float %x) { ; GISEL-NEXT: v_sub_u32_e32 v2, 64, v7 ; GISEL-NEXT: v_lshrrev_b64 v[2:3], v2, v[4:5] ; GISEL-NEXT: v_lshlrev_b64 v[4:5], v6, v[4:5] -; GISEL-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v7 +; GISEL-NEXT: v_cmp_ne_u32_e64 s[6:7], 0, v7 ; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[8:9], v11, v8, v[0:1] ; GISEL-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v12, v2, 0, s[6:7] +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, v2, s[6:7] ; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[8:9], v12, v9, v[6:7] ; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[8:9], v10, v9, 0 ; GISEL-NEXT: v_mov_b32_e32 v2, v6 @@ -977,7 +977,7 @@ define i128 @fptosi_f32_to_i128(float %x) { ; GISEL-NEXT: v_addc_co_u32_e64 v6, s[10:11], v7, v6, s[10:11] ; GISEL-NEXT: v_addc_co_u32_e64 v4, s[8:9], v6, v4, s[8:9] ; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[8:9], v12, v8, v[4:5] -; GISEL-NEXT: v_cndmask_b32_e64 v3, v3, 0, s[6:7] +; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, v3, s[6:7] ; GISEL-NEXT: ; implicit-def: $vgpr8 ; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[6:7], v3, v9, v[6:7] ; GISEL-NEXT: ; implicit-def: $vgpr6 @@ -1313,10 +1313,10 @@ define i128 @fptoui_f32_to_i128(float %x) { ; GISEL-NEXT: v_sub_u32_e32 v2, 64, v7 ; GISEL-NEXT: v_lshrrev_b64 v[2:3], v2, v[4:5] ; GISEL-NEXT: v_lshlrev_b64 v[4:5], v6, v[4:5] -; GISEL-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v7 +; GISEL-NEXT: v_cmp_ne_u32_e64 s[6:7], 0, v7 ; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[8:9], v11, v8, v[0:1] ; GISEL-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v12, v2, 0, s[6:7] +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, v2, s[6:7] ; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[8:9], v12, v9, v[6:7] ; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[8:9], v10, v9, 0 ; GISEL-NEXT: v_mov_b32_e32 v2, v6 @@ -1328,7 +1328,7 @@ define i128 @fptoui_f32_to_i128(float %x) { ; GISEL-NEXT: v_addc_co_u32_e64 v6, s[10:11], v7, v6, s[10:11] ; GISEL-NEXT: v_addc_co_u32_e64 v4, s[8:9], v6, v4, s[8:9] ; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[8:9], v12, v8, v[4:5] -; GISEL-NEXT: v_cndmask_b32_e64 v3, v3, 0, s[6:7] +; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, v3, s[6:7] ; GISEL-NEXT: ; implicit-def: $vgpr8 ; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[6:7], v3, v9, v[6:7] ; GISEL-NEXT: ; implicit-def: $vgpr6 @@ -1692,8 +1692,8 @@ define i128 @fptosi_bf16_to_i128(bfloat %x) { ; GISEL-NEXT: v_lshlrev_b64 v[4:5], v4, v[6:7] ; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[8:9], v12, v9, v[0:1] ; GISEL-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc -; GISEL-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v10 -; GISEL-NEXT: v_cndmask_b32_e64 v10, v2, 0, s[6:7] +; GISEL-NEXT: v_cmp_ne_u32_e64 s[6:7], 0, v10 +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, v2, s[6:7] ; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[8:9], v10, v8, v[6:7] ; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[8:9], v11, v8, 0 ; GISEL-NEXT: v_mov_b32_e32 v2, v6 @@ -1705,7 +1705,7 @@ define i128 @fptosi_bf16_to_i128(bfloat %x) { ; GISEL-NEXT: v_addc_co_u32_e64 v6, s[10:11], v7, v6, s[10:11] ; GISEL-NEXT: v_addc_co_u32_e64 v4, s[8:9], v6, v4, s[8:9] ; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[8:9], v10, v9, v[4:5] -; GISEL-NEXT: v_cndmask_b32_e64 v3, v3, 0, s[6:7] +; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, v3, s[6:7] ; GISEL-NEXT: ; implicit-def: $vgpr5 ; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[6:7], v3, v8, v[6:7] ; GISEL-NEXT: ; implicit-def: $vgpr6_vgpr7 @@ -2039,8 +2039,8 @@ define i128 @fptoui_bf16_to_i128(bfloat %x) { ; GISEL-NEXT: v_lshlrev_b64 v[4:5], v4, v[6:7] ; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[8:9], v12, v9, v[0:1] ; GISEL-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc -; GISEL-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v10 -; GISEL-NEXT: v_cndmask_b32_e64 v10, v2, 0, s[6:7] +; GISEL-NEXT: v_cmp_ne_u32_e64 s[6:7], 0, v10 +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, v2, s[6:7] ; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[8:9], v10, v8, v[6:7] ; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[8:9], v11, v8, 0 ; GISEL-NEXT: v_mov_b32_e32 v2, v6 @@ -2052,7 +2052,7 @@ define i128 @fptoui_bf16_to_i128(bfloat %x) { ; GISEL-NEXT: v_addc_co_u32_e64 v6, s[10:11], v7, v6, s[10:11] ; GISEL-NEXT: v_addc_co_u32_e64 v4, s[8:9], v6, v4, s[8:9] ; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[8:9], v10, v9, v[4:5] -; GISEL-NEXT: v_cndmask_b32_e64 v3, v3, 0, s[6:7] +; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, v3, s[6:7] ; GISEL-NEXT: ; implicit-def: $vgpr5 ; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[6:7], v3, v8, v[6:7] ; GISEL-NEXT: ; implicit-def: $vgpr6_vgpr7 diff --git a/llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll b/llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll index 6925a98f643b9..2e037335ce37a 100644 --- a/llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll +++ b/llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll @@ -1919,31 +1919,31 @@ define <8 x double> @double8_inselt_vec(<8 x double> %vec, i32 %sel) { ; GCN-LABEL: double8_inselt_vec: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v16 +; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 0, v16 ; GCN-NEXT: v_mov_b32_e32 v17, 0x3ff00000 -; GCN-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc -; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v17, vcc -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v16 -; GCN-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc -; GCN-NEXT: v_cndmask_b32_e32 v3, v3, v17, vcc -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v16 -; GCN-NEXT: v_cndmask_b32_e64 v4, v4, 0, vcc -; GCN-NEXT: v_cndmask_b32_e32 v5, v5, v17, vcc -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v16 -; GCN-NEXT: v_cndmask_b32_e64 v6, v6, 0, vcc -; GCN-NEXT: v_cndmask_b32_e32 v7, v7, v17, vcc -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v16 -; GCN-NEXT: v_cndmask_b32_e64 v8, v8, 0, vcc -; GCN-NEXT: v_cndmask_b32_e32 v9, v9, v17, vcc -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v16 -; GCN-NEXT: v_cndmask_b32_e64 v10, v10, 0, vcc -; GCN-NEXT: v_cndmask_b32_e32 v11, v11, v17, vcc -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v16 -; GCN-NEXT: v_cndmask_b32_e64 v12, v12, 0, vcc -; GCN-NEXT: v_cndmask_b32_e32 v13, v13, v17, vcc -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v16 -; GCN-NEXT: v_cndmask_b32_e64 v14, v14, 0, vcc -; GCN-NEXT: v_cndmask_b32_e32 v15, v15, v17, vcc +; GCN-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; GCN-NEXT: v_cndmask_b32_e32 v1, v17, v1, vcc +; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 1, v16 +; GCN-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GCN-NEXT: v_cndmask_b32_e32 v3, v17, v3, vcc +; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 2, v16 +; GCN-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc +; GCN-NEXT: v_cndmask_b32_e32 v5, v17, v5, vcc +; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 3, v16 +; GCN-NEXT: v_cndmask_b32_e32 v6, 0, v6, vcc +; GCN-NEXT: v_cndmask_b32_e32 v7, v17, v7, vcc +; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 4, v16 +; GCN-NEXT: v_cndmask_b32_e32 v8, 0, v8, vcc +; GCN-NEXT: v_cndmask_b32_e32 v9, v17, v9, vcc +; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 5, v16 +; GCN-NEXT: v_cndmask_b32_e32 v10, 0, v10, vcc +; GCN-NEXT: v_cndmask_b32_e32 v11, v17, v11, vcc +; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 6, v16 +; GCN-NEXT: v_cndmask_b32_e32 v12, 0, v12, vcc +; GCN-NEXT: v_cndmask_b32_e32 v13, v17, v13, vcc +; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 7, v16 +; GCN-NEXT: v_cndmask_b32_e32 v14, 0, v14, vcc +; GCN-NEXT: v_cndmask_b32_e32 v15, v17, v15, vcc ; GCN-NEXT: s_setpc_b64 s[30:31] entry: %v = insertelement <8 x double> %vec, double 1.000000e+00, i32 %sel diff --git a/llvm/test/CodeGen/AMDGPU/itofp.i128.ll b/llvm/test/CodeGen/AMDGPU/itofp.i128.ll index c316ec71863d0..6bfeda6a1a9e5 100644 --- a/llvm/test/CodeGen/AMDGPU/itofp.i128.ll +++ b/llvm/test/CodeGen/AMDGPU/itofp.i128.ll @@ -214,11 +214,11 @@ define float @sitofp_i128_to_f32(i128 %x) { ; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v14 ; GISEL-NEXT: v_cndmask_b32_e32 v5, v11, v15, vcc ; GISEL-NEXT: v_cndmask_b32_e32 v11, v12, v16, vcc -; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v14 +; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v14 ; GISEL-NEXT: v_cndmask_b32_e32 v9, 0, v9, vcc ; GISEL-NEXT: v_cndmask_b32_e32 v10, 0, v10, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v5, v5, -1, s[4:5] -; GISEL-NEXT: v_cndmask_b32_e64 v11, v11, -1, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e64 v5, -1, v5, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e64 v11, -1, v11, s[4:5] ; GISEL-NEXT: v_and_b32_e32 v2, v9, v2 ; GISEL-NEXT: v_and_b32_e32 v3, v10, v3 ; GISEL-NEXT: v_and_or_b32 v0, v5, v0, v2 @@ -459,11 +459,11 @@ define float @uitofp_i128_to_f32(i128 %x) { ; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v13 ; GISEL-NEXT: v_cndmask_b32_e32 v5, v10, v14, vcc ; GISEL-NEXT: v_cndmask_b32_e32 v10, v11, v15, vcc -; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v13 +; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v13 ; GISEL-NEXT: v_cndmask_b32_e32 v8, 0, v8, vcc ; GISEL-NEXT: v_cndmask_b32_e32 v9, 0, v9, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v5, v5, -1, s[4:5] -; GISEL-NEXT: v_cndmask_b32_e64 v10, v10, -1, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e64 v5, -1, v5, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e64 v10, -1, v10, s[4:5] ; GISEL-NEXT: v_and_b32_e32 v2, v8, v2 ; GISEL-NEXT: v_and_b32_e32 v3, v9, v3 ; GISEL-NEXT: v_and_or_b32 v0, v5, v0, v2 @@ -746,11 +746,11 @@ define double @sitofp_i128_to_f64(i128 %x) { ; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v15 ; GISEL-NEXT: v_cndmask_b32_e32 v9, v12, v16, vcc ; GISEL-NEXT: v_cndmask_b32_e32 v12, v13, v17, vcc -; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v15 +; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v15 ; GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc ; GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v9, v9, -1, s[4:5] -; GISEL-NEXT: v_cndmask_b32_e64 v12, v12, -1, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e64 v9, -1, v9, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e64 v12, -1, v12, s[4:5] ; GISEL-NEXT: v_and_b32_e32 v0, v0, v4 ; GISEL-NEXT: v_and_b32_e32 v1, v1, v5 ; GISEL-NEXT: v_and_or_b32 v0, v9, v2, v0 @@ -1023,11 +1023,11 @@ define double @uitofp_i128_to_f64(i128 %x) { ; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v15 ; GISEL-NEXT: v_cndmask_b32_e32 v8, v12, v16, vcc ; GISEL-NEXT: v_cndmask_b32_e32 v12, v13, v17, vcc -; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v15 +; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v15 ; GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc ; GISEL-NEXT: v_cndmask_b32_e32 v5, 0, v5, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v8, v8, -1, s[4:5] -; GISEL-NEXT: v_cndmask_b32_e64 v12, v12, -1, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e64 v8, -1, v8, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e64 v12, -1, v12, s[4:5] ; GISEL-NEXT: v_and_b32_e32 v2, v4, v2 ; GISEL-NEXT: v_and_b32_e32 v3, v5, v3 ; GISEL-NEXT: v_and_or_b32 v0, v8, v0, v2 @@ -1305,11 +1305,11 @@ define half @sitofp_i128_to_f16(i128 %x) { ; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v14 ; GISEL-NEXT: v_cndmask_b32_e32 v5, v11, v15, vcc ; GISEL-NEXT: v_cndmask_b32_e32 v11, v12, v16, vcc -; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v14 +; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v14 ; GISEL-NEXT: v_cndmask_b32_e32 v9, 0, v9, vcc ; GISEL-NEXT: v_cndmask_b32_e32 v10, 0, v10, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v5, v5, -1, s[4:5] -; GISEL-NEXT: v_cndmask_b32_e64 v11, v11, -1, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e64 v5, -1, v5, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e64 v11, -1, v11, s[4:5] ; GISEL-NEXT: v_and_b32_e32 v2, v9, v2 ; GISEL-NEXT: v_and_b32_e32 v3, v10, v3 ; GISEL-NEXT: v_and_or_b32 v0, v5, v0, v2 @@ -1552,11 +1552,11 @@ define half @uitofp_i128_to_f16(i128 %x) { ; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v13 ; GISEL-NEXT: v_cndmask_b32_e32 v5, v10, v14, vcc ; GISEL-NEXT: v_cndmask_b32_e32 v10, v11, v15, vcc -; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v13 +; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v13 ; GISEL-NEXT: v_cndmask_b32_e32 v8, 0, v8, vcc ; GISEL-NEXT: v_cndmask_b32_e32 v9, 0, v9, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v5, v5, -1, s[4:5] -; GISEL-NEXT: v_cndmask_b32_e64 v10, v10, -1, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e64 v5, -1, v5, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e64 v10, -1, v10, s[4:5] ; GISEL-NEXT: v_and_b32_e32 v2, v8, v2 ; GISEL-NEXT: v_and_b32_e32 v3, v9, v3 ; GISEL-NEXT: v_and_or_b32 v0, v5, v0, v2 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.exp.ll b/llvm/test/CodeGen/AMDGPU/llvm.exp.ll index 978f223aafb94..13c3b36f4f8f1 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.exp.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.exp.ll @@ -69,9 +69,9 @@ define amdgpu_kernel void @s_exp_f32(ptr addrspace(1) %out, float %in) { ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 ; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2ce8ed0 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v1 +; VI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, s2, v1 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42b17218 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc ; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v1 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, v0, v2, vcc ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) @@ -123,11 +123,11 @@ define amdgpu_kernel void @s_exp_f32(ptr addrspace(1) %out, float %in) { ; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v3 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v2 +; GFX900-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, s2, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 ; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x42b17218 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v1 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0 @@ -179,11 +179,11 @@ define amdgpu_kernel void @s_exp_f32(ptr addrspace(1) %out, float %in) { ; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v3 ; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v2 +; SI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, s2, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42b17218 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc ; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v1 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc ; SI-GISEL-NEXT: s_mov_b32 s2, -1 @@ -424,15 +424,15 @@ define amdgpu_kernel void @s_exp_v2f32(ptr addrspace(1) %out, <2 x float> %in) { ; VI-GISEL-NEXT: v_exp_f32_e32 v5, v0 ; VI-GISEL-NEXT: v_ldexp_f32 v2, v2, v3 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc2ce8ed0 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v3 +; VI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, s2, v3 ; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x42b17218 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc ; VI-GISEL-NEXT: v_mov_b32_e32 v6, 0x7f800000 ; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v4 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v6, vcc ; VI-GISEL-NEXT: v_ldexp_f32 v1, v5, v1 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s3, v3 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; VI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, s3, v3 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s3, v4 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, s1 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc @@ -504,14 +504,14 @@ define amdgpu_kernel void @s_exp_v2f32(ptr addrspace(1) %out, <2 x float> %in) { ; GFX900-GISEL-NEXT: v_exp_f32_e32 v5, v0 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0xc2ce8ed0 ; GFX900-GISEL-NEXT: v_ldexp_f32 v2, v2, v3 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v4 +; GFX900-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, s2, v4 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x42b17218 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v3 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v6, vcc ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v5, v1 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s3, v4 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; GFX900-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, s3, v4 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s3, v3 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0 @@ -585,14 +585,14 @@ define amdgpu_kernel void @s_exp_v2f32(ptr addrspace(1) %out, <2 x float> %in) { ; SI-GISEL-NEXT: v_exp_f32_e32 v5, v0 ; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0xc2ce8ed0 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v2, v2, v3 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v4 +; SI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, s2, v4 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x42b17218 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc +; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc ; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v3 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v6, vcc ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v5, v1 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s3, v4 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; SI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, s3, v4 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s3, v3 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc ; SI-GISEL-NEXT: s_mov_b32 s2, -1 @@ -960,9 +960,9 @@ define amdgpu_kernel void @s_exp_v3f32(ptr addrspace(1) %out, <3 x float> %in) { ; VI-GISEL-NEXT: v_exp_f32_e32 v5, v5 ; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v3 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc2ce8ed0 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s0, v3 +; VI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, s0, v3 ; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x42b17218 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc ; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v4 ; VI-GISEL-NEXT: s_and_b32 s0, s2, 0xfffff000 ; VI-GISEL-NEXT: v_ldexp_f32 v5, v5, v6 @@ -981,13 +981,13 @@ define amdgpu_kernel void @s_exp_v3f32(ptr addrspace(1) %out, <3 x float> %in) { ; VI-GISEL-NEXT: v_exp_f32_e32 v6, v1 ; VI-GISEL-NEXT: v_mov_b32_e32 v7, 0x7f800000 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s1, v3 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v5, 0, vcc +; VI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, s1, v3 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v5, vcc ; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s1, v4 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc ; VI-GISEL-NEXT: v_ldexp_f32 v2, v6, v2 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v3 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc +; VI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, s2, v3 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc ; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v4 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, s4 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc @@ -1080,21 +1080,21 @@ define amdgpu_kernel void @s_exp_v3f32(ptr addrspace(1) %out, <3 x float> %in) { ; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0xc2ce8ed0 ; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v6, v1 ; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v3 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s0, v4 +; GFX900-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, s0, v4 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x42b17218 ; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v6, v1 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc ; GFX900-GISEL-NEXT: v_mov_b32_e32 v7, 0x7f800000 ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v3 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s1, v4 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v5, 0, vcc +; GFX900-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, s1, v4 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v5, vcc ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s1, v3 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc ; GFX900-GISEL-NEXT: v_ldexp_f32 v2, v6, v2 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v4 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc +; GFX900-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, s2, v4 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v3 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0 @@ -1189,21 +1189,21 @@ define amdgpu_kernel void @s_exp_v3f32(ptr addrspace(1) %out, <3 x float> %in) { ; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0xc2ce8ed0 ; SI-GISEL-NEXT: v_add_f32_e32 v1, v6, v1 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v3 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s0, v4 +; SI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, s0, v4 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x42b17218 ; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 ; SI-GISEL-NEXT: v_exp_f32_e32 v6, v1 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc ; SI-GISEL-NEXT: v_mov_b32_e32 v7, 0x7f800000 ; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v3 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s1, v4 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v5, 0, vcc +; SI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, s1, v4 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v5, vcc ; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s1, v3 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc ; SI-GISEL-NEXT: v_ldexp_f32_e32 v2, v6, v2 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v4 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc +; SI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, s2, v4 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc ; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v3 ; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc @@ -1719,8 +1719,8 @@ define amdgpu_kernel void @s_exp_v4f32(ptr addrspace(1) %out, <4 x float> %in) { ; VI-GISEL-NEXT: v_add_f32_e32 v1, v6, v1 ; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v6, v7 ; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s0, v4 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; VI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, s0, v4 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc ; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v5 ; VI-GISEL-NEXT: s_and_b32 s0, s2, 0xfffff000 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v6 @@ -1752,19 +1752,19 @@ define amdgpu_kernel void @s_exp_v4f32(ptr addrspace(1) %out, <4 x float> %in) { ; VI-GISEL-NEXT: v_mov_b32_e32 v7, 0x7f800000 ; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s1, v4 +; VI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, s1, v4 ; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v3, v8 ; VI-GISEL-NEXT: v_exp_f32_e32 v8, v2 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s1, v5 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v4 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, v6, 0, vcc +; VI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, s2, v4 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v6, vcc ; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v5 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc ; VI-GISEL-NEXT: v_ldexp_f32 v3, v8, v3 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s3, v4 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v3, v3, 0, vcc +; VI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, s3, v4 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v3, vcc ; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s3, v5 ; VI-GISEL-NEXT: v_mov_b32_e32 v4, s4 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc @@ -1855,7 +1855,7 @@ define amdgpu_kernel void @s_exp_v4f32(ptr addrspace(1) %out, <4 x float> %in) { ; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v4 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0xc2ce8ed0 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s0, v4 +; GFX900-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, s0, v4 ; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, s1, v2 ; GFX900-GISEL-NEXT: v_fma_f32 v6, s1, v2, -v1 @@ -1865,7 +1865,7 @@ define amdgpu_kernel void @s_exp_v4f32(ptr addrspace(1) %out, <4 x float> %in) { ; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v6 ; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v6, v7 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc ; GFX900-GISEL-NEXT: v_mov_b32_e32 v7, 0x7f800000 ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v5 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v6 @@ -1878,8 +1878,8 @@ define amdgpu_kernel void @s_exp_v4f32(ptr addrspace(1) %out, <4 x float> %in) { ; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v8, v9 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v6, v6 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s1, v4 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; GFX900-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, s1, v4 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GFX900-GISEL-NEXT: v_ldexp_f32 v6, v6, v8 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v8, s3, v2 ; GFX900-GISEL-NEXT: v_fma_f32 v2, s3, v2, -v8 @@ -1891,13 +1891,13 @@ define amdgpu_kernel void @s_exp_v4f32(ptr addrspace(1) %out, <4 x float> %in) { ; GFX900-GISEL-NEXT: v_exp_f32_e32 v8, v2 ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s1, v5 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v4 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, v6, 0, vcc +; GFX900-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, s2, v4 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v6, vcc ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v5 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc ; GFX900-GISEL-NEXT: v_ldexp_f32 v3, v8, v3 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s3, v4 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v3, v3, 0, vcc +; GFX900-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, s3, v4 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v3, vcc ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s3, v5 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc ; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0 @@ -1989,7 +1989,7 @@ define amdgpu_kernel void @s_exp_v4f32(ptr addrspace(1) %out, <4 x float> %in) { ; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v4 ; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 ; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0xc2ce8ed0 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s0, v4 +; SI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, s0, v4 ; SI-GISEL-NEXT: s_mov_b32 s6, -1 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; SI-GISEL-NEXT: v_mul_f32_e32 v1, s1, v2 @@ -2000,7 +2000,7 @@ define amdgpu_kernel void @s_exp_v4f32(ptr addrspace(1) %out, <4 x float> %in) { ; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v6 ; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v6, v7 ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc ; SI-GISEL-NEXT: v_mov_b32_e32 v7, 0x7f800000 ; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v5 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v6 @@ -2013,8 +2013,8 @@ define amdgpu_kernel void @s_exp_v4f32(ptr addrspace(1) %out, <4 x float> %in) { ; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v8, v9 ; SI-GISEL-NEXT: v_exp_f32_e32 v6, v6 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s1, v4 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; SI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, s1, v4 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; SI-GISEL-NEXT: v_ldexp_f32_e32 v6, v6, v8 ; SI-GISEL-NEXT: v_mul_f32_e32 v8, s3, v2 ; SI-GISEL-NEXT: v_fma_f32 v2, s3, v2, -v8 @@ -2026,13 +2026,13 @@ define amdgpu_kernel void @s_exp_v4f32(ptr addrspace(1) %out, <4 x float> %in) { ; SI-GISEL-NEXT: v_exp_f32_e32 v8, v2 ; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s1, v5 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v4 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, v6, 0, vcc +; SI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, s2, v4 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v6, vcc ; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v5 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc ; SI-GISEL-NEXT: v_ldexp_f32_e32 v3, v8, v3 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s3, v4 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v3, v3, 0, vcc +; SI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, s3, v4 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v3, vcc ; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s3, v5 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc ; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000 @@ -2565,9 +2565,9 @@ define float @v_exp_f32(float %in) { ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -2611,9 +2611,9 @@ define float @v_exp_f32(float %in) { ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -2657,9 +2657,9 @@ define float @v_exp_f32(float %in) { ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -3400,8 +3400,8 @@ define float @v_exp_f32_ninf(float %in) { ; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; VI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-SDAG-LABEL: v_exp_f32_ninf: @@ -3438,8 +3438,8 @@ define float @v_exp_f32_ninf(float %in) { ; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; GFX900-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; SI-SDAG-LABEL: v_exp_f32_ninf: @@ -3476,8 +3476,8 @@ define float @v_exp_f32_ninf(float %in) { ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; SI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_exp_f32_ninf: @@ -3770,9 +3770,9 @@ define float @v_exp_f32_daz(float %in) #0 { ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -3816,9 +3816,9 @@ define float @v_exp_f32_daz(float %in) #0 { ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -3862,9 +3862,9 @@ define float @v_exp_f32_daz(float %in) #0 { ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -3928,9 +3928,9 @@ define float @v_exp_f32_nnan(float %in) { ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -3974,9 +3974,9 @@ define float @v_exp_f32_nnan(float %in) { ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -4020,9 +4020,9 @@ define float @v_exp_f32_nnan(float %in) { ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -4086,9 +4086,9 @@ define float @v_exp_f32_nnan_daz(float %in) #0 { ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -4132,9 +4132,9 @@ define float @v_exp_f32_nnan_daz(float %in) #0 { ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -4178,9 +4178,9 @@ define float @v_exp_f32_nnan_daz(float %in) #0 { ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -4244,9 +4244,9 @@ define float @v_exp_f32_nnan_dynamic(float %in) #1 { ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -4290,9 +4290,9 @@ define float @v_exp_f32_nnan_dynamic(float %in) #1 { ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -4336,9 +4336,9 @@ define float @v_exp_f32_nnan_dynamic(float %in) #1 { ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -4397,8 +4397,8 @@ define float @v_exp_f32_ninf_daz(float %in) #0 { ; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; VI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-SDAG-LABEL: v_exp_f32_ninf_daz: @@ -4435,8 +4435,8 @@ define float @v_exp_f32_ninf_daz(float %in) #0 { ; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; GFX900-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; SI-SDAG-LABEL: v_exp_f32_ninf_daz: @@ -4473,8 +4473,8 @@ define float @v_exp_f32_ninf_daz(float %in) #0 { ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; SI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_exp_f32_ninf_daz: @@ -4531,8 +4531,8 @@ define float @v_exp_f32_ninf_dynamic(float %in) #1 { ; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; VI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-SDAG-LABEL: v_exp_f32_ninf_dynamic: @@ -4569,8 +4569,8 @@ define float @v_exp_f32_ninf_dynamic(float %in) #1 { ; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; GFX900-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; SI-SDAG-LABEL: v_exp_f32_ninf_dynamic: @@ -4607,8 +4607,8 @@ define float @v_exp_f32_ninf_dynamic(float %in) #1 { ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; SI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_exp_f32_ninf_dynamic: @@ -4665,8 +4665,8 @@ define float @v_exp_f32_nnan_ninf(float %in) { ; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; VI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-SDAG-LABEL: v_exp_f32_nnan_ninf: @@ -4703,8 +4703,8 @@ define float @v_exp_f32_nnan_ninf(float %in) { ; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; GFX900-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; SI-SDAG-LABEL: v_exp_f32_nnan_ninf: @@ -4741,8 +4741,8 @@ define float @v_exp_f32_nnan_ninf(float %in) { ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; SI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_exp_f32_nnan_ninf: @@ -4799,8 +4799,8 @@ define float @v_exp_f32_nnan_ninf_daz(float %in) #0 { ; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; VI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-SDAG-LABEL: v_exp_f32_nnan_ninf_daz: @@ -4837,8 +4837,8 @@ define float @v_exp_f32_nnan_ninf_daz(float %in) #0 { ; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; GFX900-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; SI-SDAG-LABEL: v_exp_f32_nnan_ninf_daz: @@ -4875,8 +4875,8 @@ define float @v_exp_f32_nnan_ninf_daz(float %in) #0 { ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; SI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_exp_f32_nnan_ninf_daz: @@ -4933,8 +4933,8 @@ define float @v_exp_f32_nnan_ninf_dynamic(float %in) #1 { ; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; VI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-SDAG-LABEL: v_exp_f32_nnan_ninf_dynamic: @@ -4971,8 +4971,8 @@ define float @v_exp_f32_nnan_ninf_dynamic(float %in) #1 { ; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; GFX900-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; SI-SDAG-LABEL: v_exp_f32_nnan_ninf_dynamic: @@ -5009,8 +5009,8 @@ define float @v_exp_f32_nnan_ninf_dynamic(float %in) #1 { ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; SI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_exp_f32_nnan_ninf_dynamic: @@ -5100,9 +5100,9 @@ define float @v_exp_f32_dynamic_mode(float %in) #1 { ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -5146,9 +5146,9 @@ define float @v_exp_f32_dynamic_mode(float %in) #1 { ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -5192,9 +5192,9 @@ define float @v_exp_f32_dynamic_mode(float %in) #1 { ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -5244,9 +5244,9 @@ define float @v_exp_f32_undef() { ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 ; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2ce8ed0 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s4, v1 +; VI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v1 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42b17218 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc ; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v1 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -5283,9 +5283,9 @@ define float @v_exp_f32_undef() { ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 ; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2ce8ed0 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s4, v1 +; GFX900-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v1 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x42b17218 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v1 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -5322,9 +5322,9 @@ define float @v_exp_f32_undef() { ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2ce8ed0 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s4, v1 +; SI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v1 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42b17218 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc ; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v1 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -5416,9 +5416,9 @@ define float @v_exp_f32_from_fpext_f16(i16 %src.i) { ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -5462,11 +5462,11 @@ define float @v_exp_f32_from_fpext_f16(i16 %src.i) { ; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v4 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0xc2ce8ed0 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v3 +; GFX900-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v3 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -5510,11 +5510,11 @@ define float @v_exp_f32_from_fpext_f16(i16 %src.i) { ; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v4 ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc2ce8ed0 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v3 +; SI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v3 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -5584,9 +5584,9 @@ define float @v_exp_f32_from_fpext_math_f16(i16 %src0.i, i16 %src1.i) { ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -5634,9 +5634,9 @@ define float @v_exp_f32_from_fpext_math_f16(i16 %src0.i, i16 %src1.i) { ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -5688,9 +5688,9 @@ define float @v_exp_f32_from_fpext_math_f16(i16 %src0.i, i16 %src1.i) { ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 ; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x42b17218 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v3 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v4 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v5, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -5905,9 +5905,9 @@ define float @v_exp_f32_from_fpext_math_f16_daz(i16 %src0.i, i16 %src1.i) #0 { ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -5955,9 +5955,9 @@ define float @v_exp_f32_from_fpext_math_f16_daz(i16 %src0.i, i16 %src1.i) #0 { ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -6009,9 +6009,9 @@ define float @v_exp_f32_from_fpext_math_f16_daz(i16 %src0.i, i16 %src1.i) #0 { ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 ; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x42b17218 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v3 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v4 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v5, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -7026,9 +7026,9 @@ define float @v_exp_f32_contract(float %in) { ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -7072,9 +7072,9 @@ define float @v_exp_f32_contract(float %in) { ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -7118,9 +7118,9 @@ define float @v_exp_f32_contract(float %in) { ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -7184,9 +7184,9 @@ define float @v_exp_f32_contract_daz(float %in) #0 { ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -7230,9 +7230,9 @@ define float @v_exp_f32_contract_daz(float %in) #0 { ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -7276,9 +7276,9 @@ define float @v_exp_f32_contract_daz(float %in) #0 { ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -7337,8 +7337,8 @@ define float @v_exp_f32_contract_nnan_ninf(float %in) { ; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; VI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-SDAG-LABEL: v_exp_f32_contract_nnan_ninf: @@ -7375,8 +7375,8 @@ define float @v_exp_f32_contract_nnan_ninf(float %in) { ; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; GFX900-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; SI-SDAG-LABEL: v_exp_f32_contract_nnan_ninf: @@ -7413,8 +7413,8 @@ define float @v_exp_f32_contract_nnan_ninf(float %in) { ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; SI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_exp_f32_contract_nnan_ninf: diff --git a/llvm/test/CodeGen/AMDGPU/llvm.exp10.ll b/llvm/test/CodeGen/AMDGPU/llvm.exp10.ll index 70c3787bac9a1..1fedfa75b94ca 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.exp10.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.exp10.ll @@ -71,9 +71,9 @@ define amdgpu_kernel void @s_exp10_f32(ptr addrspace(1) %out, float %in) { ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 ; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc23369f4 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v1 +; VI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, s2, v1 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x421a209b -; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc ; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v1 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, v0, v2, vcc ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) @@ -125,11 +125,11 @@ define amdgpu_kernel void @s_exp10_f32(ptr addrspace(1) %out, float %in) { ; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v3 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v2 +; GFX900-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, s2, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 ; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x421a209b -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v1 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0 @@ -181,11 +181,11 @@ define amdgpu_kernel void @s_exp10_f32(ptr addrspace(1) %out, float %in) { ; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v3 ; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v2 +; SI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, s2, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x421a209b -; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc ; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v1 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc ; SI-GISEL-NEXT: s_mov_b32 s2, -1 @@ -426,15 +426,15 @@ define amdgpu_kernel void @s_exp10_v2f32(ptr addrspace(1) %out, <2 x float> %in) ; VI-GISEL-NEXT: v_exp_f32_e32 v5, v0 ; VI-GISEL-NEXT: v_ldexp_f32 v2, v2, v3 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc23369f4 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v3 +; VI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, s2, v3 ; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x421a209b -; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc ; VI-GISEL-NEXT: v_mov_b32_e32 v6, 0x7f800000 ; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v4 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v6, vcc ; VI-GISEL-NEXT: v_ldexp_f32 v1, v5, v1 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s3, v3 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; VI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, s3, v3 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s3, v4 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, s1 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc @@ -506,14 +506,14 @@ define amdgpu_kernel void @s_exp10_v2f32(ptr addrspace(1) %out, <2 x float> %in) ; GFX900-GISEL-NEXT: v_exp_f32_e32 v5, v0 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0xc23369f4 ; GFX900-GISEL-NEXT: v_ldexp_f32 v2, v2, v3 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v4 +; GFX900-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, s2, v4 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x421a209b -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v3 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v6, vcc ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v5, v1 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s3, v4 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; GFX900-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, s3, v4 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s3, v3 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0 @@ -587,14 +587,14 @@ define amdgpu_kernel void @s_exp10_v2f32(ptr addrspace(1) %out, <2 x float> %in) ; SI-GISEL-NEXT: v_exp_f32_e32 v5, v0 ; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0xc23369f4 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v2, v2, v3 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v4 +; SI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, s2, v4 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x421a209b -; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc +; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc ; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v3 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v6, vcc ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v5, v1 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s3, v4 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; SI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, s3, v4 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s3, v3 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc ; SI-GISEL-NEXT: s_mov_b32 s2, -1 @@ -962,9 +962,9 @@ define amdgpu_kernel void @s_exp10_v3f32(ptr addrspace(1) %out, <3 x float> %in) ; VI-GISEL-NEXT: v_exp_f32_e32 v5, v5 ; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v3 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc23369f4 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s0, v3 +; VI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, s0, v3 ; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x421a209b -; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc ; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v4 ; VI-GISEL-NEXT: s_and_b32 s0, s2, 0xfffff000 ; VI-GISEL-NEXT: v_ldexp_f32 v5, v5, v6 @@ -983,13 +983,13 @@ define amdgpu_kernel void @s_exp10_v3f32(ptr addrspace(1) %out, <3 x float> %in) ; VI-GISEL-NEXT: v_exp_f32_e32 v6, v1 ; VI-GISEL-NEXT: v_mov_b32_e32 v7, 0x7f800000 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s1, v3 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v5, 0, vcc +; VI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, s1, v3 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v5, vcc ; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s1, v4 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc ; VI-GISEL-NEXT: v_ldexp_f32 v2, v6, v2 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v3 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc +; VI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, s2, v3 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc ; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v4 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, s4 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc @@ -1082,21 +1082,21 @@ define amdgpu_kernel void @s_exp10_v3f32(ptr addrspace(1) %out, <3 x float> %in) ; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0xc23369f4 ; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v6, v1 ; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v3 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s0, v4 +; GFX900-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, s0, v4 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x421a209b ; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v6, v1 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc ; GFX900-GISEL-NEXT: v_mov_b32_e32 v7, 0x7f800000 ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v3 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s1, v4 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v5, 0, vcc +; GFX900-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, s1, v4 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v5, vcc ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s1, v3 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc ; GFX900-GISEL-NEXT: v_ldexp_f32 v2, v6, v2 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v4 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc +; GFX900-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, s2, v4 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v3 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0 @@ -1191,21 +1191,21 @@ define amdgpu_kernel void @s_exp10_v3f32(ptr addrspace(1) %out, <3 x float> %in) ; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0xc23369f4 ; SI-GISEL-NEXT: v_add_f32_e32 v1, v6, v1 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v3 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s0, v4 +; SI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, s0, v4 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x421a209b ; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 ; SI-GISEL-NEXT: v_exp_f32_e32 v6, v1 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc ; SI-GISEL-NEXT: v_mov_b32_e32 v7, 0x7f800000 ; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v3 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s1, v4 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v5, 0, vcc +; SI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, s1, v4 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v5, vcc ; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s1, v3 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc ; SI-GISEL-NEXT: v_ldexp_f32_e32 v2, v6, v2 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v4 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc +; SI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, s2, v4 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc ; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v3 ; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc @@ -1721,8 +1721,8 @@ define amdgpu_kernel void @s_exp10_v4f32(ptr addrspace(1) %out, <4 x float> %in) ; VI-GISEL-NEXT: v_add_f32_e32 v1, v6, v1 ; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v6, v7 ; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s0, v4 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; VI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, s0, v4 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc ; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v5 ; VI-GISEL-NEXT: s_and_b32 s0, s2, 0xfffff000 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v6 @@ -1754,19 +1754,19 @@ define amdgpu_kernel void @s_exp10_v4f32(ptr addrspace(1) %out, <4 x float> %in) ; VI-GISEL-NEXT: v_mov_b32_e32 v7, 0x7f800000 ; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s1, v4 +; VI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, s1, v4 ; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v3, v8 ; VI-GISEL-NEXT: v_exp_f32_e32 v8, v2 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s1, v5 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v4 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, v6, 0, vcc +; VI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, s2, v4 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v6, vcc ; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v5 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc ; VI-GISEL-NEXT: v_ldexp_f32 v3, v8, v3 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s3, v4 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v3, v3, 0, vcc +; VI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, s3, v4 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v3, vcc ; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s3, v5 ; VI-GISEL-NEXT: v_mov_b32_e32 v4, s4 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc @@ -1857,7 +1857,7 @@ define amdgpu_kernel void @s_exp10_v4f32(ptr addrspace(1) %out, <4 x float> %in) ; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v4 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0xc23369f4 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s0, v4 +; GFX900-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, s0, v4 ; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, s1, v2 ; GFX900-GISEL-NEXT: v_fma_f32 v6, s1, v2, -v1 @@ -1867,7 +1867,7 @@ define amdgpu_kernel void @s_exp10_v4f32(ptr addrspace(1) %out, <4 x float> %in) ; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v6 ; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v6, v7 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc ; GFX900-GISEL-NEXT: v_mov_b32_e32 v7, 0x7f800000 ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v5 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v6 @@ -1880,8 +1880,8 @@ define amdgpu_kernel void @s_exp10_v4f32(ptr addrspace(1) %out, <4 x float> %in) ; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v8, v9 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v6, v6 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s1, v4 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; GFX900-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, s1, v4 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GFX900-GISEL-NEXT: v_ldexp_f32 v6, v6, v8 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v8, s3, v2 ; GFX900-GISEL-NEXT: v_fma_f32 v2, s3, v2, -v8 @@ -1893,13 +1893,13 @@ define amdgpu_kernel void @s_exp10_v4f32(ptr addrspace(1) %out, <4 x float> %in) ; GFX900-GISEL-NEXT: v_exp_f32_e32 v8, v2 ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s1, v5 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v4 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, v6, 0, vcc +; GFX900-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, s2, v4 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v6, vcc ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v5 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc ; GFX900-GISEL-NEXT: v_ldexp_f32 v3, v8, v3 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s3, v4 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v3, v3, 0, vcc +; GFX900-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, s3, v4 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v3, vcc ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s3, v5 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc ; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0 @@ -1991,7 +1991,7 @@ define amdgpu_kernel void @s_exp10_v4f32(ptr addrspace(1) %out, <4 x float> %in) ; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v4 ; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 ; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0xc23369f4 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s0, v4 +; SI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, s0, v4 ; SI-GISEL-NEXT: s_mov_b32 s6, -1 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; SI-GISEL-NEXT: v_mul_f32_e32 v1, s1, v2 @@ -2002,7 +2002,7 @@ define amdgpu_kernel void @s_exp10_v4f32(ptr addrspace(1) %out, <4 x float> %in) ; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v6 ; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v6, v7 ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc ; SI-GISEL-NEXT: v_mov_b32_e32 v7, 0x7f800000 ; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v5 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v6 @@ -2015,8 +2015,8 @@ define amdgpu_kernel void @s_exp10_v4f32(ptr addrspace(1) %out, <4 x float> %in) ; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v8, v9 ; SI-GISEL-NEXT: v_exp_f32_e32 v6, v6 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s1, v4 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; SI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, s1, v4 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; SI-GISEL-NEXT: v_ldexp_f32_e32 v6, v6, v8 ; SI-GISEL-NEXT: v_mul_f32_e32 v8, s3, v2 ; SI-GISEL-NEXT: v_fma_f32 v2, s3, v2, -v8 @@ -2028,13 +2028,13 @@ define amdgpu_kernel void @s_exp10_v4f32(ptr addrspace(1) %out, <4 x float> %in) ; SI-GISEL-NEXT: v_exp_f32_e32 v8, v2 ; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s1, v5 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v4 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, v6, 0, vcc +; SI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, s2, v4 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v6, vcc ; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v5 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc ; SI-GISEL-NEXT: v_ldexp_f32_e32 v3, v8, v3 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s3, v4 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v3, v3, 0, vcc +; SI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, s3, v4 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v3, vcc ; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s3, v5 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc ; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000 @@ -2567,9 +2567,9 @@ define float @v_exp10_f32(float %in) { ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b -; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -2613,9 +2613,9 @@ define float @v_exp10_f32(float %in) { ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -2659,9 +2659,9 @@ define float @v_exp10_f32(float %in) { ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b -; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -3420,8 +3420,8 @@ define float @v_exp10_f32_ninf(float %in) { ; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; VI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-SDAG-LABEL: v_exp10_f32_ninf: @@ -3458,8 +3458,8 @@ define float @v_exp10_f32_ninf(float %in) { ; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; GFX900-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; SI-SDAG-LABEL: v_exp10_f32_ninf: @@ -3496,8 +3496,8 @@ define float @v_exp10_f32_ninf(float %in) { ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; SI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_exp10_f32_ninf: @@ -3828,9 +3828,9 @@ define float @v_exp10_f32_daz(float %in) #0 { ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b -; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -3874,9 +3874,9 @@ define float @v_exp10_f32_daz(float %in) #0 { ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -3920,9 +3920,9 @@ define float @v_exp10_f32_daz(float %in) #0 { ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b -; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -3986,9 +3986,9 @@ define float @v_exp10_f32_nnan(float %in) { ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b -; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -4032,9 +4032,9 @@ define float @v_exp10_f32_nnan(float %in) { ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -4078,9 +4078,9 @@ define float @v_exp10_f32_nnan(float %in) { ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b -; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -4144,9 +4144,9 @@ define float @v_exp10_f32_nnan_daz(float %in) #0 { ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b -; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -4190,9 +4190,9 @@ define float @v_exp10_f32_nnan_daz(float %in) #0 { ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -4236,9 +4236,9 @@ define float @v_exp10_f32_nnan_daz(float %in) #0 { ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b -; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -4302,9 +4302,9 @@ define float @v_exp10_f32_nnan_dynamic(float %in) #1 { ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b -; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -4348,9 +4348,9 @@ define float @v_exp10_f32_nnan_dynamic(float %in) #1 { ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -4394,9 +4394,9 @@ define float @v_exp10_f32_nnan_dynamic(float %in) #1 { ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b -; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -4455,8 +4455,8 @@ define float @v_exp10_f32_ninf_daz(float %in) #0 { ; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; VI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-SDAG-LABEL: v_exp10_f32_ninf_daz: @@ -4493,8 +4493,8 @@ define float @v_exp10_f32_ninf_daz(float %in) #0 { ; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; GFX900-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; SI-SDAG-LABEL: v_exp10_f32_ninf_daz: @@ -4531,8 +4531,8 @@ define float @v_exp10_f32_ninf_daz(float %in) #0 { ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; SI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_exp10_f32_ninf_daz: @@ -4589,8 +4589,8 @@ define float @v_exp10_f32_ninf_dynamic(float %in) #1 { ; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; VI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-SDAG-LABEL: v_exp10_f32_ninf_dynamic: @@ -4627,8 +4627,8 @@ define float @v_exp10_f32_ninf_dynamic(float %in) #1 { ; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; GFX900-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; SI-SDAG-LABEL: v_exp10_f32_ninf_dynamic: @@ -4665,8 +4665,8 @@ define float @v_exp10_f32_ninf_dynamic(float %in) #1 { ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; SI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_exp10_f32_ninf_dynamic: @@ -4723,8 +4723,8 @@ define float @v_exp10_f32_nnan_ninf(float %in) { ; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; VI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-SDAG-LABEL: v_exp10_f32_nnan_ninf: @@ -4761,8 +4761,8 @@ define float @v_exp10_f32_nnan_ninf(float %in) { ; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; GFX900-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; SI-SDAG-LABEL: v_exp10_f32_nnan_ninf: @@ -4799,8 +4799,8 @@ define float @v_exp10_f32_nnan_ninf(float %in) { ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; SI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_exp10_f32_nnan_ninf: @@ -4857,8 +4857,8 @@ define float @v_exp10_f32_nnan_ninf_daz(float %in) #0 { ; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; VI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-SDAG-LABEL: v_exp10_f32_nnan_ninf_daz: @@ -4895,8 +4895,8 @@ define float @v_exp10_f32_nnan_ninf_daz(float %in) #0 { ; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; GFX900-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; SI-SDAG-LABEL: v_exp10_f32_nnan_ninf_daz: @@ -4933,8 +4933,8 @@ define float @v_exp10_f32_nnan_ninf_daz(float %in) #0 { ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; SI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_exp10_f32_nnan_ninf_daz: @@ -4991,8 +4991,8 @@ define float @v_exp10_f32_nnan_ninf_dynamic(float %in) #1 { ; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; VI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-SDAG-LABEL: v_exp10_f32_nnan_ninf_dynamic: @@ -5029,8 +5029,8 @@ define float @v_exp10_f32_nnan_ninf_dynamic(float %in) #1 { ; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; GFX900-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; SI-SDAG-LABEL: v_exp10_f32_nnan_ninf_dynamic: @@ -5067,8 +5067,8 @@ define float @v_exp10_f32_nnan_ninf_dynamic(float %in) #1 { ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; SI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_exp10_f32_nnan_ninf_dynamic: @@ -5178,9 +5178,9 @@ define float @v_exp10_f32_dynamic_mode(float %in) #1 { ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b -; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -5224,9 +5224,9 @@ define float @v_exp10_f32_dynamic_mode(float %in) #1 { ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -5270,9 +5270,9 @@ define float @v_exp10_f32_dynamic_mode(float %in) #1 { ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b -; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -5322,9 +5322,9 @@ define float @v_exp10_f32_undef() { ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 ; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc23369f4 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s4, v1 +; VI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v1 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x421a209b -; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc ; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v1 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -5361,9 +5361,9 @@ define float @v_exp10_f32_undef() { ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 ; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0xc23369f4 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s4, v1 +; GFX900-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v1 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x421a209b -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v1 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -5400,9 +5400,9 @@ define float @v_exp10_f32_undef() { ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc23369f4 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s4, v1 +; SI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v1 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x421a209b -; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc ; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v1 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -5494,9 +5494,9 @@ define float @v_exp10_f32_from_fpext_f16(i16 %src.i) { ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b -; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -5540,11 +5540,11 @@ define float @v_exp10_f32_from_fpext_f16(i16 %src.i) { ; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v4 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0xc23369f4 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v3 +; GFX900-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v3 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -5588,11 +5588,11 @@ define float @v_exp10_f32_from_fpext_f16(i16 %src.i) { ; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v4 ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc23369f4 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v3 +; SI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v3 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b -; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -5662,9 +5662,9 @@ define float @v_exp10_f32_from_fpext_math_f16(i16 %src0.i, i16 %src1.i) { ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b -; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -5712,9 +5712,9 @@ define float @v_exp10_f32_from_fpext_math_f16(i16 %src0.i, i16 %src1.i) { ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -5766,9 +5766,9 @@ define float @v_exp10_f32_from_fpext_math_f16(i16 %src0.i, i16 %src1.i) { ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 ; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x421a209b -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v3 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v4 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v5, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -5998,9 +5998,9 @@ define float @v_exp10_f32_from_fpext_math_f16_daz(i16 %src0.i, i16 %src1.i) #0 { ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b -; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -6048,9 +6048,9 @@ define float @v_exp10_f32_from_fpext_math_f16_daz(i16 %src0.i, i16 %src1.i) #0 { ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -6102,9 +6102,9 @@ define float @v_exp10_f32_from_fpext_math_f16_daz(i16 %src0.i, i16 %src1.i) #0 { ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 ; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x421a209b -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v3 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v4 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v5, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -7120,9 +7120,9 @@ define float @v_exp10_f32_contract(float %in) { ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b -; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -7166,9 +7166,9 @@ define float @v_exp10_f32_contract(float %in) { ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -7212,9 +7212,9 @@ define float @v_exp10_f32_contract(float %in) { ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b -; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -7278,9 +7278,9 @@ define float @v_exp10_f32_contract_daz(float %in) #0 { ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b -; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -7324,9 +7324,9 @@ define float @v_exp10_f32_contract_daz(float %in) #0 { ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -7370,9 +7370,9 @@ define float @v_exp10_f32_contract_daz(float %in) #0 { ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b -; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] @@ -7431,8 +7431,8 @@ define float @v_exp10_f32_contract_nnan_ninf(float %in) { ; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; VI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-SDAG-LABEL: v_exp10_f32_contract_nnan_ninf: @@ -7469,8 +7469,8 @@ define float @v_exp10_f32_contract_nnan_ninf(float %in) { ; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; GFX900-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; SI-SDAG-LABEL: v_exp10_f32_contract_nnan_ninf: @@ -7507,8 +7507,8 @@ define float @v_exp10_f32_contract_nnan_ninf(float %in) { ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; SI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_exp10_f32_contract_nnan_ninf: diff --git a/llvm/test/CodeGen/AMDGPU/llvm.maximum.f64.ll b/llvm/test/CodeGen/AMDGPU/llvm.maximum.f64.ll index a18e5ace18704..e3a3ab55ef4f2 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.maximum.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.maximum.f64.ll @@ -13,60 +13,60 @@ define double @v_maximum_f64(double %src0, double %src1) { ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3] -; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3] +; GFX7-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[2:3] ; GFX7-NEXT: v_mov_b32_e32 v1, 0x7ff80000 -; GFX7-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc -; GFX7-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_maximum_f64: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3] -; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3] +; GFX8-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[2:3] ; GFX8-NEXT: v_mov_b32_e32 v1, 0x7ff80000 -; GFX8-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc -; GFX8-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-LABEL: v_maximum_f64: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3] -; GFX900-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3] +; GFX900-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[2:3] ; GFX900-NEXT: v_mov_b32_e32 v1, 0x7ff80000 -; GFX900-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc -; GFX900-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc +; GFX900-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc +; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_maximum_f64: ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX950-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3] -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3] +; GFX950-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[2:3] ; GFX950-NEXT: v_mov_b32_e32 v1, 0x7ff80000 ; GFX950-NEXT: s_nop 0 -; GFX950-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc ; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_maximum_f64: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3] -; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3] -; GFX10-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo +; GFX10-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[2:3] +; GFX10-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v5, vcc_lo ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_maximum_f64: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3] -; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3] +; GFX11-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[2:3] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v5, vcc_lo ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: v_maximum_f64: @@ -131,60 +131,60 @@ define double @v_maximum_f64__nsz(double %src0, double %src1) { ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3] -; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3] +; GFX7-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[2:3] ; GFX7-NEXT: v_mov_b32_e32 v1, 0x7ff80000 -; GFX7-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc -; GFX7-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_maximum_f64__nsz: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3] -; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3] +; GFX8-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[2:3] ; GFX8-NEXT: v_mov_b32_e32 v1, 0x7ff80000 -; GFX8-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc -; GFX8-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-LABEL: v_maximum_f64__nsz: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3] -; GFX900-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3] +; GFX900-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[2:3] ; GFX900-NEXT: v_mov_b32_e32 v1, 0x7ff80000 -; GFX900-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc -; GFX900-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc +; GFX900-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc +; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_maximum_f64__nsz: ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX950-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3] -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3] +; GFX950-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[2:3] ; GFX950-NEXT: v_mov_b32_e32 v1, 0x7ff80000 ; GFX950-NEXT: s_nop 0 -; GFX950-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc ; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_maximum_f64__nsz: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3] -; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3] -; GFX10-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo +; GFX10-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[2:3] +; GFX10-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v5, vcc_lo ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_maximum_f64__nsz: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3] -; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3] +; GFX11-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[2:3] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v5, vcc_lo ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: v_maximum_f64__nsz: @@ -250,10 +250,10 @@ define double @v_maximum_f64__nnan_src0(double %arg0, double %src1) { ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: v_add_f64 v[0:1], v[0:1], 1.0 ; GFX7-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3] -; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3] +; GFX7-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[2:3] ; GFX7-NEXT: v_mov_b32_e32 v1, 0x7ff80000 -; GFX7-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc -; GFX7-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_maximum_f64__nnan_src0: @@ -261,10 +261,10 @@ define double @v_maximum_f64__nnan_src0(double %arg0, double %src1) { ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_add_f64 v[0:1], v[0:1], 1.0 ; GFX8-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3] -; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3] +; GFX8-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[2:3] ; GFX8-NEXT: v_mov_b32_e32 v1, 0x7ff80000 -; GFX8-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc -; GFX8-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-LABEL: v_maximum_f64__nnan_src0: @@ -272,10 +272,10 @@ define double @v_maximum_f64__nnan_src0(double %arg0, double %src1) { ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: v_add_f64 v[0:1], v[0:1], 1.0 ; GFX900-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3] -; GFX900-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3] +; GFX900-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[2:3] ; GFX900-NEXT: v_mov_b32_e32 v1, 0x7ff80000 -; GFX900-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc -; GFX900-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc +; GFX900-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc +; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_maximum_f64__nnan_src0: @@ -283,11 +283,11 @@ define double @v_maximum_f64__nnan_src0(double %arg0, double %src1) { ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX950-NEXT: v_add_f64 v[0:1], v[0:1], 1.0 ; GFX950-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3] -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3] +; GFX950-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[2:3] ; GFX950-NEXT: v_mov_b32_e32 v1, 0x7ff80000 ; GFX950-NEXT: s_nop 0 -; GFX950-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc ; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_maximum_f64__nnan_src0: @@ -295,9 +295,9 @@ define double @v_maximum_f64__nnan_src0(double %arg0, double %src1) { ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_add_f64 v[0:1], v[0:1], 1.0 ; GFX10-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3] -; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3] -; GFX10-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo +; GFX10-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[2:3] +; GFX10-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v5, vcc_lo ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_maximum_f64__nnan_src0: @@ -306,10 +306,10 @@ define double @v_maximum_f64__nnan_src0(double %arg0, double %src1) { ; GFX11-NEXT: v_add_f64 v[0:1], v[0:1], 1.0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3] -; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3] -; GFX11-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc_lo +; GFX11-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[2:3] +; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc_lo ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) -; GFX11-NEXT: v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v5, vcc_lo ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: v_maximum_f64__nnan_src0: @@ -334,10 +334,10 @@ define double @v_maximum_f64__nnan_src1(double %src0, double %arg1) { ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: v_add_f64 v[2:3], v[2:3], 1.0 ; GFX7-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3] -; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3] +; GFX7-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[2:3] ; GFX7-NEXT: v_mov_b32_e32 v1, 0x7ff80000 -; GFX7-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc -; GFX7-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_maximum_f64__nnan_src1: @@ -345,10 +345,10 @@ define double @v_maximum_f64__nnan_src1(double %src0, double %arg1) { ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_add_f64 v[2:3], v[2:3], 1.0 ; GFX8-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3] -; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3] +; GFX8-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[2:3] ; GFX8-NEXT: v_mov_b32_e32 v1, 0x7ff80000 -; GFX8-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc -; GFX8-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-LABEL: v_maximum_f64__nnan_src1: @@ -356,10 +356,10 @@ define double @v_maximum_f64__nnan_src1(double %src0, double %arg1) { ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: v_add_f64 v[2:3], v[2:3], 1.0 ; GFX900-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3] -; GFX900-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3] +; GFX900-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[2:3] ; GFX900-NEXT: v_mov_b32_e32 v1, 0x7ff80000 -; GFX900-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc -; GFX900-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc +; GFX900-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc +; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_maximum_f64__nnan_src1: @@ -367,11 +367,11 @@ define double @v_maximum_f64__nnan_src1(double %src0, double %arg1) { ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX950-NEXT: v_add_f64 v[2:3], v[2:3], 1.0 ; GFX950-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3] -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3] +; GFX950-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[2:3] ; GFX950-NEXT: v_mov_b32_e32 v1, 0x7ff80000 ; GFX950-NEXT: s_nop 0 -; GFX950-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc ; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_maximum_f64__nnan_src1: @@ -379,9 +379,9 @@ define double @v_maximum_f64__nnan_src1(double %src0, double %arg1) { ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_add_f64 v[2:3], v[2:3], 1.0 ; GFX10-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3] -; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3] -; GFX10-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo +; GFX10-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[2:3] +; GFX10-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v5, vcc_lo ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_maximum_f64__nnan_src1: @@ -390,10 +390,10 @@ define double @v_maximum_f64__nnan_src1(double %src0, double %arg1) { ; GFX11-NEXT: v_add_f64 v[2:3], v[2:3], 1.0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3] -; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3] -; GFX11-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc_lo +; GFX11-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[2:3] +; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc_lo ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) -; GFX11-NEXT: v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v5, vcc_lo ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: v_maximum_f64__nnan_src1: @@ -419,10 +419,10 @@ define void @s_maximum_f64(double inreg %src0, double inreg %src1) { ; GFX7-NEXT: v_mov_b32_e32 v0, s18 ; GFX7-NEXT: v_mov_b32_e32 v1, s19 ; GFX7-NEXT: v_max_f64 v[2:3], s[16:17], v[0:1] -; GFX7-NEXT: v_cmp_u_f64_e32 vcc, s[16:17], v[0:1] +; GFX7-NEXT: v_cmp_o_f64_e32 vcc, s[16:17], v[0:1] ; GFX7-NEXT: v_mov_b32_e32 v4, 0x7ff80000 -; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc -; GFX7-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX7-NEXT: ;;#ASMSTART ; GFX7-NEXT: ; use v[0:1] ; GFX7-NEXT: ;;#ASMEND @@ -434,10 +434,10 @@ define void @s_maximum_f64(double inreg %src0, double inreg %src1) { ; GFX8-NEXT: v_mov_b32_e32 v0, s18 ; GFX8-NEXT: v_mov_b32_e32 v1, s19 ; GFX8-NEXT: v_max_f64 v[2:3], s[16:17], v[0:1] -; GFX8-NEXT: v_cmp_u_f64_e32 vcc, s[16:17], v[0:1] +; GFX8-NEXT: v_cmp_o_f64_e32 vcc, s[16:17], v[0:1] ; GFX8-NEXT: v_mov_b32_e32 v4, 0x7ff80000 -; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc -; GFX8-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX8-NEXT: ;;#ASMSTART ; GFX8-NEXT: ; use v[0:1] ; GFX8-NEXT: ;;#ASMEND @@ -449,10 +449,10 @@ define void @s_maximum_f64(double inreg %src0, double inreg %src1) { ; GFX900-NEXT: v_mov_b32_e32 v0, s18 ; GFX900-NEXT: v_mov_b32_e32 v1, s19 ; GFX900-NEXT: v_max_f64 v[2:3], s[16:17], v[0:1] -; GFX900-NEXT: v_cmp_u_f64_e32 vcc, s[16:17], v[0:1] +; GFX900-NEXT: v_cmp_o_f64_e32 vcc, s[16:17], v[0:1] ; GFX900-NEXT: v_mov_b32_e32 v4, 0x7ff80000 -; GFX900-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc -; GFX900-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX900-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc +; GFX900-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use v[0:1] ; GFX900-NEXT: ;;#ASMEND @@ -464,10 +464,10 @@ define void @s_maximum_f64(double inreg %src0, double inreg %src1) { ; GFX950-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX950-NEXT: v_max_f64 v[2:3], s[0:1], v[0:1] ; GFX950-NEXT: v_mov_b32_e32 v4, 0x7ff80000 -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, s[0:1], v[0:1] +; GFX950-NEXT: v_cmp_o_f64_e32 vcc, s[0:1], v[0:1] ; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc -; GFX950-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX950-NEXT: ;;#ASMSTART ; GFX950-NEXT: ; use v[0:1] ; GFX950-NEXT: ;;#ASMEND @@ -520,85 +520,85 @@ define <2 x double> @v_maximum_v2f64(<2 x double> %src0, <2 x double> %src1) { ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: v_max_f64 v[8:9], v[0:1], v[4:5] -; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5] +; GFX7-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[4:5] ; GFX7-NEXT: v_max_f64 v[4:5], v[2:3], v[6:7] -; GFX7-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[6:7] +; GFX7-NEXT: v_cmp_o_f64_e64 s[4:5], v[2:3], v[6:7] ; GFX7-NEXT: v_mov_b32_e32 v3, 0x7ff80000 -; GFX7-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc -; GFX7-NEXT: v_cndmask_b32_e32 v1, v9, v3, vcc -; GFX7-NEXT: v_cndmask_b32_e64 v2, v4, 0, s[4:5] -; GFX7-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[4:5] +; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v8, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v9, vcc +; GFX7-NEXT: v_cndmask_b32_e64 v2, 0, v4, s[4:5] +; GFX7-NEXT: v_cndmask_b32_e64 v3, v3, v5, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_maximum_v2f64: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_max_f64 v[8:9], v[0:1], v[4:5] -; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5] +; GFX8-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[4:5] ; GFX8-NEXT: v_max_f64 v[4:5], v[2:3], v[6:7] -; GFX8-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[6:7] +; GFX8-NEXT: v_cmp_o_f64_e64 s[4:5], v[2:3], v[6:7] ; GFX8-NEXT: v_mov_b32_e32 v3, 0x7ff80000 -; GFX8-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc -; GFX8-NEXT: v_cndmask_b32_e32 v1, v9, v3, vcc -; GFX8-NEXT: v_cndmask_b32_e64 v2, v4, 0, s[4:5] -; GFX8-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[4:5] +; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v8, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v9, vcc +; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, v4, s[4:5] +; GFX8-NEXT: v_cndmask_b32_e64 v3, v3, v5, s[4:5] ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-LABEL: v_maximum_v2f64: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: v_max_f64 v[8:9], v[0:1], v[4:5] -; GFX900-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5] +; GFX900-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[4:5] ; GFX900-NEXT: v_max_f64 v[4:5], v[2:3], v[6:7] -; GFX900-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[6:7] +; GFX900-NEXT: v_cmp_o_f64_e64 s[4:5], v[2:3], v[6:7] ; GFX900-NEXT: v_mov_b32_e32 v3, 0x7ff80000 -; GFX900-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc -; GFX900-NEXT: v_cndmask_b32_e32 v1, v9, v3, vcc -; GFX900-NEXT: v_cndmask_b32_e64 v2, v4, 0, s[4:5] -; GFX900-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[4:5] +; GFX900-NEXT: v_cndmask_b32_e32 v0, 0, v8, vcc +; GFX900-NEXT: v_cndmask_b32_e32 v1, v3, v9, vcc +; GFX900-NEXT: v_cndmask_b32_e64 v2, 0, v4, s[4:5] +; GFX900-NEXT: v_cndmask_b32_e64 v3, v3, v5, s[4:5] ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_maximum_v2f64: ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX950-NEXT: v_max_f64 v[8:9], v[0:1], v[4:5] -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5] +; GFX950-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[4:5] ; GFX950-NEXT: v_max_f64 v[4:5], v[2:3], v[6:7] ; GFX950-NEXT: s_nop 0 -; GFX950-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v0, 0, v8, vcc ; GFX950-NEXT: v_mov_b32_e32 v8, 0x7ff80000 -; GFX950-NEXT: v_cndmask_b32_e32 v1, v9, v8, vcc -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[6:7] +; GFX950-NEXT: v_cndmask_b32_e32 v1, v8, v9, vcc +; GFX950-NEXT: v_cmp_o_f64_e32 vcc, v[2:3], v[6:7] ; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e64 v2, v4, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v3, v5, v8, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v3, v8, v5, vcc ; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_maximum_v2f64: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_max_f64 v[8:9], v[0:1], v[4:5] -; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[4:5] +; GFX10-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[4:5] ; GFX10-NEXT: v_max_f64 v[4:5], v[2:3], v[6:7] -; GFX10-NEXT: v_cmp_u_f64_e64 s4, v[2:3], v[6:7] -; GFX10-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v1, v9, 0x7ff80000, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v2, v4, 0, s4 -; GFX10-NEXT: v_cndmask_b32_e64 v3, v5, 0x7ff80000, s4 +; GFX10-NEXT: v_cmp_o_f64_e64 s4, v[2:3], v[6:7] +; GFX10-NEXT: v_cndmask_b32_e32 v0, 0, v8, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v9, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, v4, s4 +; GFX10-NEXT: v_cndmask_b32_e64 v3, 0x7ff80000, v5, s4 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_maximum_v2f64: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_max_f64 v[8:9], v[0:1], v[4:5] -; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[4:5] +; GFX11-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[4:5] ; GFX11-NEXT: v_max_f64 v[4:5], v[2:3], v[6:7] -; GFX11-NEXT: v_cmp_u_f64_e64 s0, v[2:3], v[6:7] +; GFX11-NEXT: v_cmp_o_f64_e64 s0, v[2:3], v[6:7] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3) -; GFX11-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v1, v9, 0x7ff80000, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v2, v4, 0, s0 -; GFX11-NEXT: v_cndmask_b32_e64 v3, v5, 0x7ff80000, s0 +; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v8, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v9, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, v4, s0 +; GFX11-NEXT: v_cndmask_b32_e64 v3, 0x7ff80000, v5, s0 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: v_maximum_v2f64: @@ -670,85 +670,85 @@ define <2 x double> @v_maximum_v2f64__nsz(<2 x double> %src0, <2 x double> %src1 ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: v_max_f64 v[8:9], v[0:1], v[4:5] -; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5] +; GFX7-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[4:5] ; GFX7-NEXT: v_max_f64 v[4:5], v[2:3], v[6:7] -; GFX7-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[6:7] +; GFX7-NEXT: v_cmp_o_f64_e64 s[4:5], v[2:3], v[6:7] ; GFX7-NEXT: v_mov_b32_e32 v3, 0x7ff80000 -; GFX7-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc -; GFX7-NEXT: v_cndmask_b32_e32 v1, v9, v3, vcc -; GFX7-NEXT: v_cndmask_b32_e64 v2, v4, 0, s[4:5] -; GFX7-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[4:5] +; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v8, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v9, vcc +; GFX7-NEXT: v_cndmask_b32_e64 v2, 0, v4, s[4:5] +; GFX7-NEXT: v_cndmask_b32_e64 v3, v3, v5, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_maximum_v2f64__nsz: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_max_f64 v[8:9], v[0:1], v[4:5] -; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5] +; GFX8-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[4:5] ; GFX8-NEXT: v_max_f64 v[4:5], v[2:3], v[6:7] -; GFX8-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[6:7] +; GFX8-NEXT: v_cmp_o_f64_e64 s[4:5], v[2:3], v[6:7] ; GFX8-NEXT: v_mov_b32_e32 v3, 0x7ff80000 -; GFX8-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc -; GFX8-NEXT: v_cndmask_b32_e32 v1, v9, v3, vcc -; GFX8-NEXT: v_cndmask_b32_e64 v2, v4, 0, s[4:5] -; GFX8-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[4:5] +; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v8, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v9, vcc +; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, v4, s[4:5] +; GFX8-NEXT: v_cndmask_b32_e64 v3, v3, v5, s[4:5] ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-LABEL: v_maximum_v2f64__nsz: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: v_max_f64 v[8:9], v[0:1], v[4:5] -; GFX900-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5] +; GFX900-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[4:5] ; GFX900-NEXT: v_max_f64 v[4:5], v[2:3], v[6:7] -; GFX900-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[6:7] +; GFX900-NEXT: v_cmp_o_f64_e64 s[4:5], v[2:3], v[6:7] ; GFX900-NEXT: v_mov_b32_e32 v3, 0x7ff80000 -; GFX900-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc -; GFX900-NEXT: v_cndmask_b32_e32 v1, v9, v3, vcc -; GFX900-NEXT: v_cndmask_b32_e64 v2, v4, 0, s[4:5] -; GFX900-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[4:5] +; GFX900-NEXT: v_cndmask_b32_e32 v0, 0, v8, vcc +; GFX900-NEXT: v_cndmask_b32_e32 v1, v3, v9, vcc +; GFX900-NEXT: v_cndmask_b32_e64 v2, 0, v4, s[4:5] +; GFX900-NEXT: v_cndmask_b32_e64 v3, v3, v5, s[4:5] ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_maximum_v2f64__nsz: ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX950-NEXT: v_max_f64 v[8:9], v[0:1], v[4:5] -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5] +; GFX950-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[4:5] ; GFX950-NEXT: v_max_f64 v[4:5], v[2:3], v[6:7] ; GFX950-NEXT: s_nop 0 -; GFX950-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v0, 0, v8, vcc ; GFX950-NEXT: v_mov_b32_e32 v8, 0x7ff80000 -; GFX950-NEXT: v_cndmask_b32_e32 v1, v9, v8, vcc -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[6:7] +; GFX950-NEXT: v_cndmask_b32_e32 v1, v8, v9, vcc +; GFX950-NEXT: v_cmp_o_f64_e32 vcc, v[2:3], v[6:7] ; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e64 v2, v4, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v3, v5, v8, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v3, v8, v5, vcc ; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_maximum_v2f64__nsz: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_max_f64 v[8:9], v[0:1], v[4:5] -; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[4:5] +; GFX10-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[4:5] ; GFX10-NEXT: v_max_f64 v[4:5], v[2:3], v[6:7] -; GFX10-NEXT: v_cmp_u_f64_e64 s4, v[2:3], v[6:7] -; GFX10-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v1, v9, 0x7ff80000, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v2, v4, 0, s4 -; GFX10-NEXT: v_cndmask_b32_e64 v3, v5, 0x7ff80000, s4 +; GFX10-NEXT: v_cmp_o_f64_e64 s4, v[2:3], v[6:7] +; GFX10-NEXT: v_cndmask_b32_e32 v0, 0, v8, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v9, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, v4, s4 +; GFX10-NEXT: v_cndmask_b32_e64 v3, 0x7ff80000, v5, s4 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_maximum_v2f64__nsz: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_max_f64 v[8:9], v[0:1], v[4:5] -; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[4:5] +; GFX11-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[4:5] ; GFX11-NEXT: v_max_f64 v[4:5], v[2:3], v[6:7] -; GFX11-NEXT: v_cmp_u_f64_e64 s0, v[2:3], v[6:7] +; GFX11-NEXT: v_cmp_o_f64_e64 s0, v[2:3], v[6:7] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3) -; GFX11-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v1, v9, 0x7ff80000, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v2, v4, 0, s0 -; GFX11-NEXT: v_cndmask_b32_e64 v3, v5, 0x7ff80000, s0 +; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v8, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v9, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, v4, s0 +; GFX11-NEXT: v_cndmask_b32_e64 v3, 0x7ff80000, v5, s0 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: v_maximum_v2f64__nsz: @@ -822,6 +822,7 @@ define void @s_maximum_v2f64(<2 x double> inreg %src0, <2 x double> inreg %src1) ; GFX7-NEXT: v_mov_b32_e32 v0, s22 ; GFX7-NEXT: v_mov_b32_e32 v1, s23 ; GFX7-NEXT: v_max_f64 v[2:3], s[18:19], v[0:1] +<<<<<<< HEAD ; GFX7-NEXT: v_cmp_u_f64_e32 vcc, s[18:19], v[0:1] ; GFX7-NEXT: v_mov_b32_e32 v0, s20 ; GFX7-NEXT: v_mov_b32_e32 v1, s21 @@ -832,6 +833,16 @@ define void @s_maximum_v2f64(<2 x double> inreg %src0, <2 x double> inreg %src1) ; GFX7-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc ; GFX7-NEXT: v_cndmask_b32_e64 v1, v5, v6, s[4:5] ; GFX7-NEXT: v_cndmask_b32_e64 v0, v4, 0, s[4:5] +======= +; GFX7-NEXT: v_cmp_o_f64_e32 vcc, s[18:19], v[0:1] +; GFX7-NEXT: v_max_f64 v[0:1], s[16:17], v[4:5] +; GFX7-NEXT: v_cmp_o_f64_e64 s[4:5], s[16:17], v[4:5] +; GFX7-NEXT: v_mov_b32_e32 v6, 0x7ff80000 +; GFX7-NEXT: v_cndmask_b32_e32 v3, v6, v3, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GFX7-NEXT: v_cndmask_b32_e64 v1, v6, v1, s[4:5] +; GFX7-NEXT: v_cndmask_b32_e64 v0, 0, v0, s[4:5] +>>>>>>> 1336afc5defe (update tests) ; GFX7-NEXT: ;;#ASMSTART ; GFX7-NEXT: ; use v[0:3] ; GFX7-NEXT: ;;#ASMEND @@ -843,6 +854,7 @@ define void @s_maximum_v2f64(<2 x double> inreg %src0, <2 x double> inreg %src1) ; GFX8-NEXT: v_mov_b32_e32 v0, s22 ; GFX8-NEXT: v_mov_b32_e32 v1, s23 ; GFX8-NEXT: v_max_f64 v[2:3], s[18:19], v[0:1] +<<<<<<< HEAD ; GFX8-NEXT: v_cmp_u_f64_e32 vcc, s[18:19], v[0:1] ; GFX8-NEXT: v_mov_b32_e32 v0, s20 ; GFX8-NEXT: v_mov_b32_e32 v1, s21 @@ -853,6 +865,16 @@ define void @s_maximum_v2f64(<2 x double> inreg %src0, <2 x double> inreg %src1) ; GFX8-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc ; GFX8-NEXT: v_cndmask_b32_e64 v1, v5, v6, s[4:5] ; GFX8-NEXT: v_cndmask_b32_e64 v0, v4, 0, s[4:5] +======= +; GFX8-NEXT: v_cmp_o_f64_e32 vcc, s[18:19], v[0:1] +; GFX8-NEXT: v_max_f64 v[0:1], s[16:17], v[4:5] +; GFX8-NEXT: v_cmp_o_f64_e64 s[4:5], s[16:17], v[4:5] +; GFX8-NEXT: v_mov_b32_e32 v6, 0x7ff80000 +; GFX8-NEXT: v_cndmask_b32_e32 v3, v6, v3, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GFX8-NEXT: v_cndmask_b32_e64 v1, v6, v1, s[4:5] +; GFX8-NEXT: v_cndmask_b32_e64 v0, 0, v0, s[4:5] +>>>>>>> 1336afc5defe (update tests) ; GFX8-NEXT: ;;#ASMSTART ; GFX8-NEXT: ; use v[0:3] ; GFX8-NEXT: ;;#ASMEND @@ -864,6 +886,7 @@ define void @s_maximum_v2f64(<2 x double> inreg %src0, <2 x double> inreg %src1) ; GFX900-NEXT: v_mov_b32_e32 v0, s22 ; GFX900-NEXT: v_mov_b32_e32 v1, s23 ; GFX900-NEXT: v_max_f64 v[2:3], s[18:19], v[0:1] +<<<<<<< HEAD ; GFX900-NEXT: v_cmp_u_f64_e32 vcc, s[18:19], v[0:1] ; GFX900-NEXT: v_mov_b32_e32 v0, s20 ; GFX900-NEXT: v_mov_b32_e32 v1, s21 @@ -874,6 +897,16 @@ define void @s_maximum_v2f64(<2 x double> inreg %src0, <2 x double> inreg %src1) ; GFX900-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc ; GFX900-NEXT: v_cndmask_b32_e64 v1, v5, v6, s[4:5] ; GFX900-NEXT: v_cndmask_b32_e64 v0, v4, 0, s[4:5] +======= +; GFX900-NEXT: v_cmp_o_f64_e32 vcc, s[18:19], v[0:1] +; GFX900-NEXT: v_max_f64 v[0:1], s[16:17], v[4:5] +; GFX900-NEXT: v_cmp_o_f64_e64 s[4:5], s[16:17], v[4:5] +; GFX900-NEXT: v_mov_b32_e32 v6, 0x7ff80000 +; GFX900-NEXT: v_cndmask_b32_e32 v3, v6, v3, vcc +; GFX900-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GFX900-NEXT: v_cndmask_b32_e64 v1, v6, v1, s[4:5] +; GFX900-NEXT: v_cndmask_b32_e64 v0, 0, v0, s[4:5] +>>>>>>> 1336afc5defe (update tests) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use v[0:3] ; GFX900-NEXT: ;;#ASMEND @@ -885,15 +918,15 @@ define void @s_maximum_v2f64(<2 x double> inreg %src0, <2 x double> inreg %src1) ; GFX950-NEXT: v_mov_b64_e32 v[0:1], s[18:19] ; GFX950-NEXT: v_max_f64 v[2:3], s[2:3], v[0:1] ; GFX950-NEXT: v_mov_b32_e32 v6, 0x7ff80000 -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, s[2:3], v[0:1] +; GFX950-NEXT: v_cmp_o_f64_e32 vcc, s[2:3], v[0:1] ; GFX950-NEXT: v_mov_b64_e32 v[0:1], s[16:17] ; GFX950-NEXT: v_max_f64 v[4:5], s[0:1], v[0:1] -; GFX950-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc -; GFX950-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, s[0:1], v[0:1] +; GFX950-NEXT: v_cndmask_b32_e32 v3, v6, v3, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GFX950-NEXT: v_cmp_o_f64_e32 vcc, s[0:1], v[0:1] ; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc -; GFX950-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc ; GFX950-NEXT: ;;#ASMSTART ; GFX950-NEXT: ; use v[0:3] ; GFX950-NEXT: ;;#ASMEND @@ -955,109 +988,109 @@ define <3 x double> @v_maximum_v3f64(<3 x double> %src0, <3 x double> %src1) { ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: v_max_f64 v[12:13], v[0:1], v[6:7] -; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7] +; GFX7-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[6:7] ; GFX7-NEXT: v_max_f64 v[6:7], v[2:3], v[8:9] -; GFX7-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[8:9] +; GFX7-NEXT: v_cmp_o_f64_e64 s[4:5], v[2:3], v[8:9] ; GFX7-NEXT: v_max_f64 v[8:9], v[4:5], v[10:11] -; GFX7-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[10:11] +; GFX7-NEXT: v_cmp_o_f64_e64 s[6:7], v[4:5], v[10:11] ; GFX7-NEXT: v_mov_b32_e32 v5, 0x7ff80000 -; GFX7-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc -; GFX7-NEXT: v_cndmask_b32_e32 v1, v13, v5, vcc -; GFX7-NEXT: v_cndmask_b32_e64 v2, v6, 0, s[4:5] -; GFX7-NEXT: v_cndmask_b32_e64 v3, v7, v5, s[4:5] -; GFX7-NEXT: v_cndmask_b32_e64 v4, v8, 0, s[6:7] -; GFX7-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[6:7] +; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v12, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v1, v5, v13, vcc +; GFX7-NEXT: v_cndmask_b32_e64 v2, 0, v6, s[4:5] +; GFX7-NEXT: v_cndmask_b32_e64 v3, v5, v7, s[4:5] +; GFX7-NEXT: v_cndmask_b32_e64 v4, 0, v8, s[6:7] +; GFX7-NEXT: v_cndmask_b32_e64 v5, v5, v9, s[6:7] ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_maximum_v3f64: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_max_f64 v[12:13], v[0:1], v[6:7] -; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7] +; GFX8-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[6:7] ; GFX8-NEXT: v_max_f64 v[6:7], v[2:3], v[8:9] -; GFX8-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[8:9] +; GFX8-NEXT: v_cmp_o_f64_e64 s[4:5], v[2:3], v[8:9] ; GFX8-NEXT: v_max_f64 v[8:9], v[4:5], v[10:11] -; GFX8-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[10:11] +; GFX8-NEXT: v_cmp_o_f64_e64 s[6:7], v[4:5], v[10:11] ; GFX8-NEXT: v_mov_b32_e32 v5, 0x7ff80000 -; GFX8-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc -; GFX8-NEXT: v_cndmask_b32_e32 v1, v13, v5, vcc -; GFX8-NEXT: v_cndmask_b32_e64 v2, v6, 0, s[4:5] -; GFX8-NEXT: v_cndmask_b32_e64 v3, v7, v5, s[4:5] -; GFX8-NEXT: v_cndmask_b32_e64 v4, v8, 0, s[6:7] -; GFX8-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[6:7] +; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v12, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v1, v5, v13, vcc +; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, v6, s[4:5] +; GFX8-NEXT: v_cndmask_b32_e64 v3, v5, v7, s[4:5] +; GFX8-NEXT: v_cndmask_b32_e64 v4, 0, v8, s[6:7] +; GFX8-NEXT: v_cndmask_b32_e64 v5, v5, v9, s[6:7] ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-LABEL: v_maximum_v3f64: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: v_max_f64 v[12:13], v[0:1], v[6:7] -; GFX900-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7] +; GFX900-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[6:7] ; GFX900-NEXT: v_max_f64 v[6:7], v[2:3], v[8:9] -; GFX900-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[8:9] +; GFX900-NEXT: v_cmp_o_f64_e64 s[4:5], v[2:3], v[8:9] ; GFX900-NEXT: v_max_f64 v[8:9], v[4:5], v[10:11] -; GFX900-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[10:11] +; GFX900-NEXT: v_cmp_o_f64_e64 s[6:7], v[4:5], v[10:11] ; GFX900-NEXT: v_mov_b32_e32 v5, 0x7ff80000 -; GFX900-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc -; GFX900-NEXT: v_cndmask_b32_e32 v1, v13, v5, vcc -; GFX900-NEXT: v_cndmask_b32_e64 v2, v6, 0, s[4:5] -; GFX900-NEXT: v_cndmask_b32_e64 v3, v7, v5, s[4:5] -; GFX900-NEXT: v_cndmask_b32_e64 v4, v8, 0, s[6:7] -; GFX900-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[6:7] +; GFX900-NEXT: v_cndmask_b32_e32 v0, 0, v12, vcc +; GFX900-NEXT: v_cndmask_b32_e32 v1, v5, v13, vcc +; GFX900-NEXT: v_cndmask_b32_e64 v2, 0, v6, s[4:5] +; GFX900-NEXT: v_cndmask_b32_e64 v3, v5, v7, s[4:5] +; GFX900-NEXT: v_cndmask_b32_e64 v4, 0, v8, s[6:7] +; GFX900-NEXT: v_cndmask_b32_e64 v5, v5, v9, s[6:7] ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_maximum_v3f64: ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX950-NEXT: v_max_f64 v[12:13], v[0:1], v[6:7] -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7] +; GFX950-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[6:7] ; GFX950-NEXT: v_max_f64 v[6:7], v[2:3], v[8:9] ; GFX950-NEXT: s_nop 0 -; GFX950-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v0, 0, v12, vcc ; GFX950-NEXT: v_mov_b32_e32 v12, 0x7ff80000 -; GFX950-NEXT: v_cndmask_b32_e32 v1, v13, v12, vcc -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[8:9] +; GFX950-NEXT: v_cndmask_b32_e32 v1, v12, v13, vcc +; GFX950-NEXT: v_cmp_o_f64_e32 vcc, v[2:3], v[8:9] ; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e64 v2, v6, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v3, v7, v12, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v2, 0, v6, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v3, v12, v7, vcc ; GFX950-NEXT: v_max_f64 v[6:7], v[4:5], v[10:11] -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[10:11] +; GFX950-NEXT: v_cmp_o_f64_e32 vcc, v[4:5], v[10:11] ; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e64 v4, v6, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v5, v7, v12, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v4, 0, v6, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v5, v12, v7, vcc ; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_maximum_v3f64: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_max_f64 v[12:13], v[0:1], v[6:7] -; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[6:7] +; GFX10-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[6:7] ; GFX10-NEXT: v_max_f64 v[6:7], v[2:3], v[8:9] -; GFX10-NEXT: v_cmp_u_f64_e64 s4, v[2:3], v[8:9] +; GFX10-NEXT: v_cmp_o_f64_e64 s4, v[2:3], v[8:9] ; GFX10-NEXT: v_max_f64 v[8:9], v[4:5], v[10:11] -; GFX10-NEXT: v_cmp_u_f64_e64 s5, v[4:5], v[10:11] -; GFX10-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v1, v13, 0x7ff80000, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v2, v6, 0, s4 -; GFX10-NEXT: v_cndmask_b32_e64 v3, v7, 0x7ff80000, s4 -; GFX10-NEXT: v_cndmask_b32_e64 v4, v8, 0, s5 -; GFX10-NEXT: v_cndmask_b32_e64 v5, v9, 0x7ff80000, s5 +; GFX10-NEXT: v_cmp_o_f64_e64 s5, v[4:5], v[10:11] +; GFX10-NEXT: v_cndmask_b32_e32 v0, 0, v12, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v13, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, v6, s4 +; GFX10-NEXT: v_cndmask_b32_e64 v3, 0x7ff80000, v7, s4 +; GFX10-NEXT: v_cndmask_b32_e64 v4, 0, v8, s5 +; GFX10-NEXT: v_cndmask_b32_e64 v5, 0x7ff80000, v9, s5 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_maximum_v3f64: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_max_f64 v[12:13], v[0:1], v[6:7] -; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[6:7] +; GFX11-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[6:7] ; GFX11-NEXT: v_max_f64 v[6:7], v[2:3], v[8:9] -; GFX11-NEXT: v_cmp_u_f64_e64 s0, v[2:3], v[8:9] +; GFX11-NEXT: v_cmp_o_f64_e64 s0, v[2:3], v[8:9] ; GFX11-NEXT: v_max_f64 v[8:9], v[4:5], v[10:11] -; GFX11-NEXT: v_cmp_u_f64_e64 s1, v[4:5], v[10:11] -; GFX11-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v1, v13, 0x7ff80000, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v2, v6, 0, s0 -; GFX11-NEXT: v_cndmask_b32_e64 v3, v7, 0x7ff80000, s0 -; GFX11-NEXT: v_cndmask_b32_e64 v4, v8, 0, s1 -; GFX11-NEXT: v_cndmask_b32_e64 v5, v9, 0x7ff80000, s1 +; GFX11-NEXT: v_cmp_o_f64_e64 s1, v[4:5], v[10:11] +; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v12, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v13, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, v6, s0 +; GFX11-NEXT: v_cndmask_b32_e64 v3, 0x7ff80000, v7, s0 +; GFX11-NEXT: v_cndmask_b32_e64 v4, 0, v8, s1 +; GFX11-NEXT: v_cndmask_b32_e64 v5, 0x7ff80000, v9, s1 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: v_maximum_v3f64: @@ -1136,109 +1169,109 @@ define <3 x double> @v_maximum_v3f64__nsz(<3 x double> %src0, <3 x double> %src1 ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: v_max_f64 v[12:13], v[0:1], v[6:7] -; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7] +; GFX7-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[6:7] ; GFX7-NEXT: v_max_f64 v[6:7], v[2:3], v[8:9] -; GFX7-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[8:9] +; GFX7-NEXT: v_cmp_o_f64_e64 s[4:5], v[2:3], v[8:9] ; GFX7-NEXT: v_max_f64 v[8:9], v[4:5], v[10:11] -; GFX7-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[10:11] +; GFX7-NEXT: v_cmp_o_f64_e64 s[6:7], v[4:5], v[10:11] ; GFX7-NEXT: v_mov_b32_e32 v5, 0x7ff80000 -; GFX7-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc -; GFX7-NEXT: v_cndmask_b32_e32 v1, v13, v5, vcc -; GFX7-NEXT: v_cndmask_b32_e64 v2, v6, 0, s[4:5] -; GFX7-NEXT: v_cndmask_b32_e64 v3, v7, v5, s[4:5] -; GFX7-NEXT: v_cndmask_b32_e64 v4, v8, 0, s[6:7] -; GFX7-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[6:7] +; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v12, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v1, v5, v13, vcc +; GFX7-NEXT: v_cndmask_b32_e64 v2, 0, v6, s[4:5] +; GFX7-NEXT: v_cndmask_b32_e64 v3, v5, v7, s[4:5] +; GFX7-NEXT: v_cndmask_b32_e64 v4, 0, v8, s[6:7] +; GFX7-NEXT: v_cndmask_b32_e64 v5, v5, v9, s[6:7] ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_maximum_v3f64__nsz: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_max_f64 v[12:13], v[0:1], v[6:7] -; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7] +; GFX8-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[6:7] ; GFX8-NEXT: v_max_f64 v[6:7], v[2:3], v[8:9] -; GFX8-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[8:9] +; GFX8-NEXT: v_cmp_o_f64_e64 s[4:5], v[2:3], v[8:9] ; GFX8-NEXT: v_max_f64 v[8:9], v[4:5], v[10:11] -; GFX8-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[10:11] +; GFX8-NEXT: v_cmp_o_f64_e64 s[6:7], v[4:5], v[10:11] ; GFX8-NEXT: v_mov_b32_e32 v5, 0x7ff80000 -; GFX8-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc -; GFX8-NEXT: v_cndmask_b32_e32 v1, v13, v5, vcc -; GFX8-NEXT: v_cndmask_b32_e64 v2, v6, 0, s[4:5] -; GFX8-NEXT: v_cndmask_b32_e64 v3, v7, v5, s[4:5] -; GFX8-NEXT: v_cndmask_b32_e64 v4, v8, 0, s[6:7] -; GFX8-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[6:7] +; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v12, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v1, v5, v13, vcc +; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, v6, s[4:5] +; GFX8-NEXT: v_cndmask_b32_e64 v3, v5, v7, s[4:5] +; GFX8-NEXT: v_cndmask_b32_e64 v4, 0, v8, s[6:7] +; GFX8-NEXT: v_cndmask_b32_e64 v5, v5, v9, s[6:7] ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-LABEL: v_maximum_v3f64__nsz: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: v_max_f64 v[12:13], v[0:1], v[6:7] -; GFX900-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7] +; GFX900-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[6:7] ; GFX900-NEXT: v_max_f64 v[6:7], v[2:3], v[8:9] -; GFX900-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[8:9] +; GFX900-NEXT: v_cmp_o_f64_e64 s[4:5], v[2:3], v[8:9] ; GFX900-NEXT: v_max_f64 v[8:9], v[4:5], v[10:11] -; GFX900-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[10:11] +; GFX900-NEXT: v_cmp_o_f64_e64 s[6:7], v[4:5], v[10:11] ; GFX900-NEXT: v_mov_b32_e32 v5, 0x7ff80000 -; GFX900-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc -; GFX900-NEXT: v_cndmask_b32_e32 v1, v13, v5, vcc -; GFX900-NEXT: v_cndmask_b32_e64 v2, v6, 0, s[4:5] -; GFX900-NEXT: v_cndmask_b32_e64 v3, v7, v5, s[4:5] -; GFX900-NEXT: v_cndmask_b32_e64 v4, v8, 0, s[6:7] -; GFX900-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[6:7] +; GFX900-NEXT: v_cndmask_b32_e32 v0, 0, v12, vcc +; GFX900-NEXT: v_cndmask_b32_e32 v1, v5, v13, vcc +; GFX900-NEXT: v_cndmask_b32_e64 v2, 0, v6, s[4:5] +; GFX900-NEXT: v_cndmask_b32_e64 v3, v5, v7, s[4:5] +; GFX900-NEXT: v_cndmask_b32_e64 v4, 0, v8, s[6:7] +; GFX900-NEXT: v_cndmask_b32_e64 v5, v5, v9, s[6:7] ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_maximum_v3f64__nsz: ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX950-NEXT: v_max_f64 v[12:13], v[0:1], v[6:7] -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7] +; GFX950-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[6:7] ; GFX950-NEXT: v_max_f64 v[6:7], v[2:3], v[8:9] ; GFX950-NEXT: s_nop 0 -; GFX950-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v0, 0, v12, vcc ; GFX950-NEXT: v_mov_b32_e32 v12, 0x7ff80000 -; GFX950-NEXT: v_cndmask_b32_e32 v1, v13, v12, vcc -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[8:9] +; GFX950-NEXT: v_cndmask_b32_e32 v1, v12, v13, vcc +; GFX950-NEXT: v_cmp_o_f64_e32 vcc, v[2:3], v[8:9] ; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e64 v2, v6, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v3, v7, v12, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v2, 0, v6, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v3, v12, v7, vcc ; GFX950-NEXT: v_max_f64 v[6:7], v[4:5], v[10:11] -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[10:11] +; GFX950-NEXT: v_cmp_o_f64_e32 vcc, v[4:5], v[10:11] ; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e64 v4, v6, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v5, v7, v12, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v4, 0, v6, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v5, v12, v7, vcc ; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_maximum_v3f64__nsz: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_max_f64 v[12:13], v[0:1], v[6:7] -; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[6:7] +; GFX10-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[6:7] ; GFX10-NEXT: v_max_f64 v[6:7], v[2:3], v[8:9] -; GFX10-NEXT: v_cmp_u_f64_e64 s4, v[2:3], v[8:9] +; GFX10-NEXT: v_cmp_o_f64_e64 s4, v[2:3], v[8:9] ; GFX10-NEXT: v_max_f64 v[8:9], v[4:5], v[10:11] -; GFX10-NEXT: v_cmp_u_f64_e64 s5, v[4:5], v[10:11] -; GFX10-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v1, v13, 0x7ff80000, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v2, v6, 0, s4 -; GFX10-NEXT: v_cndmask_b32_e64 v3, v7, 0x7ff80000, s4 -; GFX10-NEXT: v_cndmask_b32_e64 v4, v8, 0, s5 -; GFX10-NEXT: v_cndmask_b32_e64 v5, v9, 0x7ff80000, s5 +; GFX10-NEXT: v_cmp_o_f64_e64 s5, v[4:5], v[10:11] +; GFX10-NEXT: v_cndmask_b32_e32 v0, 0, v12, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v13, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, v6, s4 +; GFX10-NEXT: v_cndmask_b32_e64 v3, 0x7ff80000, v7, s4 +; GFX10-NEXT: v_cndmask_b32_e64 v4, 0, v8, s5 +; GFX10-NEXT: v_cndmask_b32_e64 v5, 0x7ff80000, v9, s5 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_maximum_v3f64__nsz: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_max_f64 v[12:13], v[0:1], v[6:7] -; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[6:7] +; GFX11-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[6:7] ; GFX11-NEXT: v_max_f64 v[6:7], v[2:3], v[8:9] -; GFX11-NEXT: v_cmp_u_f64_e64 s0, v[2:3], v[8:9] +; GFX11-NEXT: v_cmp_o_f64_e64 s0, v[2:3], v[8:9] ; GFX11-NEXT: v_max_f64 v[8:9], v[4:5], v[10:11] -; GFX11-NEXT: v_cmp_u_f64_e64 s1, v[4:5], v[10:11] -; GFX11-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v1, v13, 0x7ff80000, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v2, v6, 0, s0 -; GFX11-NEXT: v_cndmask_b32_e64 v3, v7, 0x7ff80000, s0 -; GFX11-NEXT: v_cndmask_b32_e64 v4, v8, 0, s1 -; GFX11-NEXT: v_cndmask_b32_e64 v5, v9, 0x7ff80000, s1 +; GFX11-NEXT: v_cmp_o_f64_e64 s1, v[4:5], v[10:11] +; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v12, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v13, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, v6, s0 +; GFX11-NEXT: v_cndmask_b32_e64 v3, 0x7ff80000, v7, s0 +; GFX11-NEXT: v_cndmask_b32_e64 v4, 0, v8, s1 +; GFX11-NEXT: v_cndmask_b32_e64 v5, 0x7ff80000, v9, s1 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: v_maximum_v3f64__nsz: @@ -1317,134 +1350,134 @@ define <4 x double> @v_maximum_v4f64(<4 x double> %src0, <4 x double> %src1) { ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: v_max_f64 v[16:17], v[0:1], v[8:9] -; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9] +; GFX7-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[8:9] ; GFX7-NEXT: v_max_f64 v[8:9], v[2:3], v[10:11] -; GFX7-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[10:11] +; GFX7-NEXT: v_cmp_o_f64_e64 s[4:5], v[2:3], v[10:11] ; GFX7-NEXT: v_max_f64 v[10:11], v[4:5], v[12:13] -; GFX7-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[12:13] +; GFX7-NEXT: v_cmp_o_f64_e64 s[6:7], v[4:5], v[12:13] ; GFX7-NEXT: v_max_f64 v[12:13], v[6:7], v[14:15] -; GFX7-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[14:15] +; GFX7-NEXT: v_cmp_o_f64_e64 s[8:9], v[6:7], v[14:15] ; GFX7-NEXT: v_mov_b32_e32 v7, 0x7ff80000 -; GFX7-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc -; GFX7-NEXT: v_cndmask_b32_e32 v1, v17, v7, vcc -; GFX7-NEXT: v_cndmask_b32_e64 v2, v8, 0, s[4:5] -; GFX7-NEXT: v_cndmask_b32_e64 v3, v9, v7, s[4:5] -; GFX7-NEXT: v_cndmask_b32_e64 v4, v10, 0, s[6:7] -; GFX7-NEXT: v_cndmask_b32_e64 v5, v11, v7, s[6:7] -; GFX7-NEXT: v_cndmask_b32_e64 v6, v12, 0, s[8:9] -; GFX7-NEXT: v_cndmask_b32_e64 v7, v13, v7, s[8:9] +; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v16, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v1, v7, v17, vcc +; GFX7-NEXT: v_cndmask_b32_e64 v2, 0, v8, s[4:5] +; GFX7-NEXT: v_cndmask_b32_e64 v3, v7, v9, s[4:5] +; GFX7-NEXT: v_cndmask_b32_e64 v4, 0, v10, s[6:7] +; GFX7-NEXT: v_cndmask_b32_e64 v5, v7, v11, s[6:7] +; GFX7-NEXT: v_cndmask_b32_e64 v6, 0, v12, s[8:9] +; GFX7-NEXT: v_cndmask_b32_e64 v7, v7, v13, s[8:9] ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_maximum_v4f64: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_max_f64 v[16:17], v[0:1], v[8:9] -; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9] +; GFX8-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[8:9] ; GFX8-NEXT: v_max_f64 v[8:9], v[2:3], v[10:11] -; GFX8-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[10:11] +; GFX8-NEXT: v_cmp_o_f64_e64 s[4:5], v[2:3], v[10:11] ; GFX8-NEXT: v_max_f64 v[10:11], v[4:5], v[12:13] -; GFX8-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[12:13] +; GFX8-NEXT: v_cmp_o_f64_e64 s[6:7], v[4:5], v[12:13] ; GFX8-NEXT: v_max_f64 v[12:13], v[6:7], v[14:15] -; GFX8-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[14:15] +; GFX8-NEXT: v_cmp_o_f64_e64 s[8:9], v[6:7], v[14:15] ; GFX8-NEXT: v_mov_b32_e32 v7, 0x7ff80000 -; GFX8-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc -; GFX8-NEXT: v_cndmask_b32_e32 v1, v17, v7, vcc -; GFX8-NEXT: v_cndmask_b32_e64 v2, v8, 0, s[4:5] -; GFX8-NEXT: v_cndmask_b32_e64 v3, v9, v7, s[4:5] -; GFX8-NEXT: v_cndmask_b32_e64 v4, v10, 0, s[6:7] -; GFX8-NEXT: v_cndmask_b32_e64 v5, v11, v7, s[6:7] -; GFX8-NEXT: v_cndmask_b32_e64 v6, v12, 0, s[8:9] -; GFX8-NEXT: v_cndmask_b32_e64 v7, v13, v7, s[8:9] +; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v16, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v1, v7, v17, vcc +; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, v8, s[4:5] +; GFX8-NEXT: v_cndmask_b32_e64 v3, v7, v9, s[4:5] +; GFX8-NEXT: v_cndmask_b32_e64 v4, 0, v10, s[6:7] +; GFX8-NEXT: v_cndmask_b32_e64 v5, v7, v11, s[6:7] +; GFX8-NEXT: v_cndmask_b32_e64 v6, 0, v12, s[8:9] +; GFX8-NEXT: v_cndmask_b32_e64 v7, v7, v13, s[8:9] ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-LABEL: v_maximum_v4f64: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: v_max_f64 v[16:17], v[0:1], v[8:9] -; GFX900-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9] +; GFX900-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[8:9] ; GFX900-NEXT: v_max_f64 v[8:9], v[2:3], v[10:11] -; GFX900-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[10:11] +; GFX900-NEXT: v_cmp_o_f64_e64 s[4:5], v[2:3], v[10:11] ; GFX900-NEXT: v_max_f64 v[10:11], v[4:5], v[12:13] -; GFX900-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[12:13] +; GFX900-NEXT: v_cmp_o_f64_e64 s[6:7], v[4:5], v[12:13] ; GFX900-NEXT: v_max_f64 v[12:13], v[6:7], v[14:15] -; GFX900-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[14:15] +; GFX900-NEXT: v_cmp_o_f64_e64 s[8:9], v[6:7], v[14:15] ; GFX900-NEXT: v_mov_b32_e32 v7, 0x7ff80000 -; GFX900-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc -; GFX900-NEXT: v_cndmask_b32_e32 v1, v17, v7, vcc -; GFX900-NEXT: v_cndmask_b32_e64 v2, v8, 0, s[4:5] -; GFX900-NEXT: v_cndmask_b32_e64 v3, v9, v7, s[4:5] -; GFX900-NEXT: v_cndmask_b32_e64 v4, v10, 0, s[6:7] -; GFX900-NEXT: v_cndmask_b32_e64 v5, v11, v7, s[6:7] -; GFX900-NEXT: v_cndmask_b32_e64 v6, v12, 0, s[8:9] -; GFX900-NEXT: v_cndmask_b32_e64 v7, v13, v7, s[8:9] +; GFX900-NEXT: v_cndmask_b32_e32 v0, 0, v16, vcc +; GFX900-NEXT: v_cndmask_b32_e32 v1, v7, v17, vcc +; GFX900-NEXT: v_cndmask_b32_e64 v2, 0, v8, s[4:5] +; GFX900-NEXT: v_cndmask_b32_e64 v3, v7, v9, s[4:5] +; GFX900-NEXT: v_cndmask_b32_e64 v4, 0, v10, s[6:7] +; GFX900-NEXT: v_cndmask_b32_e64 v5, v7, v11, s[6:7] +; GFX900-NEXT: v_cndmask_b32_e64 v6, 0, v12, s[8:9] +; GFX900-NEXT: v_cndmask_b32_e64 v7, v7, v13, s[8:9] ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_maximum_v4f64: ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX950-NEXT: v_max_f64 v[16:17], v[0:1], v[8:9] -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9] +; GFX950-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[8:9] ; GFX950-NEXT: v_max_f64 v[8:9], v[2:3], v[10:11] ; GFX950-NEXT: s_nop 0 -; GFX950-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v0, 0, v16, vcc ; GFX950-NEXT: v_mov_b32_e32 v16, 0x7ff80000 -; GFX950-NEXT: v_cndmask_b32_e32 v1, v17, v16, vcc -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[10:11] +; GFX950-NEXT: v_cndmask_b32_e32 v1, v16, v17, vcc +; GFX950-NEXT: v_cmp_o_f64_e32 vcc, v[2:3], v[10:11] ; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e64 v2, v8, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v3, v9, v16, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v2, 0, v8, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v3, v16, v9, vcc ; GFX950-NEXT: v_max_f64 v[8:9], v[4:5], v[12:13] -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[12:13] +; GFX950-NEXT: v_cmp_o_f64_e32 vcc, v[4:5], v[12:13] ; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e64 v4, v8, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v5, v9, v16, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v5, v16, v9, vcc ; GFX950-NEXT: v_max_f64 v[8:9], v[6:7], v[14:15] -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[6:7], v[14:15] +; GFX950-NEXT: v_cmp_o_f64_e32 vcc, v[6:7], v[14:15] ; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e64 v6, v8, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v7, v9, v16, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v6, 0, v8, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v7, v16, v9, vcc ; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_maximum_v4f64: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_max_f64 v[16:17], v[0:1], v[8:9] -; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[8:9] +; GFX10-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[8:9] ; GFX10-NEXT: v_max_f64 v[8:9], v[2:3], v[10:11] -; GFX10-NEXT: v_cmp_u_f64_e64 s4, v[2:3], v[10:11] +; GFX10-NEXT: v_cmp_o_f64_e64 s4, v[2:3], v[10:11] ; GFX10-NEXT: v_max_f64 v[10:11], v[4:5], v[12:13] -; GFX10-NEXT: v_cmp_u_f64_e64 s5, v[4:5], v[12:13] +; GFX10-NEXT: v_cmp_o_f64_e64 s5, v[4:5], v[12:13] ; GFX10-NEXT: v_max_f64 v[12:13], v[6:7], v[14:15] -; GFX10-NEXT: v_cmp_u_f64_e64 s6, v[6:7], v[14:15] -; GFX10-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v1, v17, 0x7ff80000, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v2, v8, 0, s4 -; GFX10-NEXT: v_cndmask_b32_e64 v3, v9, 0x7ff80000, s4 -; GFX10-NEXT: v_cndmask_b32_e64 v4, v10, 0, s5 -; GFX10-NEXT: v_cndmask_b32_e64 v5, v11, 0x7ff80000, s5 -; GFX10-NEXT: v_cndmask_b32_e64 v6, v12, 0, s6 -; GFX10-NEXT: v_cndmask_b32_e64 v7, v13, 0x7ff80000, s6 +; GFX10-NEXT: v_cmp_o_f64_e64 s6, v[6:7], v[14:15] +; GFX10-NEXT: v_cndmask_b32_e32 v0, 0, v16, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v17, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, v8, s4 +; GFX10-NEXT: v_cndmask_b32_e64 v3, 0x7ff80000, v9, s4 +; GFX10-NEXT: v_cndmask_b32_e64 v4, 0, v10, s5 +; GFX10-NEXT: v_cndmask_b32_e64 v5, 0x7ff80000, v11, s5 +; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, v12, s6 +; GFX10-NEXT: v_cndmask_b32_e64 v7, 0x7ff80000, v13, s6 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_maximum_v4f64: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_max_f64 v[16:17], v[0:1], v[8:9] -; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[8:9] +; GFX11-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[8:9] ; GFX11-NEXT: v_max_f64 v[8:9], v[2:3], v[10:11] -; GFX11-NEXT: v_cmp_u_f64_e64 s0, v[2:3], v[10:11] +; GFX11-NEXT: v_cmp_o_f64_e64 s0, v[2:3], v[10:11] ; GFX11-NEXT: v_max_f64 v[10:11], v[4:5], v[12:13] -; GFX11-NEXT: v_cmp_u_f64_e64 s1, v[4:5], v[12:13] +; GFX11-NEXT: v_cmp_o_f64_e64 s1, v[4:5], v[12:13] ; GFX11-NEXT: v_max_f64 v[12:13], v[6:7], v[14:15] -; GFX11-NEXT: v_cmp_u_f64_e64 s2, v[6:7], v[14:15] -; GFX11-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v1, v17, 0x7ff80000, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v2, v8, 0, s0 -; GFX11-NEXT: v_cndmask_b32_e64 v3, v9, 0x7ff80000, s0 -; GFX11-NEXT: v_cndmask_b32_e64 v4, v10, 0, s1 -; GFX11-NEXT: v_cndmask_b32_e64 v5, v11, 0x7ff80000, s1 -; GFX11-NEXT: v_cndmask_b32_e64 v6, v12, 0, s2 -; GFX11-NEXT: v_cndmask_b32_e64 v7, v13, 0x7ff80000, s2 +; GFX11-NEXT: v_cmp_o_f64_e64 s2, v[6:7], v[14:15] +; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v16, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v17, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, v8, s0 +; GFX11-NEXT: v_cndmask_b32_e64 v3, 0x7ff80000, v9, s0 +; GFX11-NEXT: v_cndmask_b32_e64 v4, 0, v10, s1 +; GFX11-NEXT: v_cndmask_b32_e64 v5, 0x7ff80000, v11, s1 +; GFX11-NEXT: v_cndmask_b32_e64 v6, 0, v12, s2 +; GFX11-NEXT: v_cndmask_b32_e64 v7, 0x7ff80000, v13, s2 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: v_maximum_v4f64: @@ -1530,134 +1563,134 @@ define <4 x double> @v_maximum_v4f64__nsz(<4 x double> %src0, <4 x double> %src1 ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: v_max_f64 v[16:17], v[0:1], v[8:9] -; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9] +; GFX7-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[8:9] ; GFX7-NEXT: v_max_f64 v[8:9], v[2:3], v[10:11] -; GFX7-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[10:11] +; GFX7-NEXT: v_cmp_o_f64_e64 s[4:5], v[2:3], v[10:11] ; GFX7-NEXT: v_max_f64 v[10:11], v[4:5], v[12:13] -; GFX7-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[12:13] +; GFX7-NEXT: v_cmp_o_f64_e64 s[6:7], v[4:5], v[12:13] ; GFX7-NEXT: v_max_f64 v[12:13], v[6:7], v[14:15] -; GFX7-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[14:15] +; GFX7-NEXT: v_cmp_o_f64_e64 s[8:9], v[6:7], v[14:15] ; GFX7-NEXT: v_mov_b32_e32 v7, 0x7ff80000 -; GFX7-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc -; GFX7-NEXT: v_cndmask_b32_e32 v1, v17, v7, vcc -; GFX7-NEXT: v_cndmask_b32_e64 v2, v8, 0, s[4:5] -; GFX7-NEXT: v_cndmask_b32_e64 v3, v9, v7, s[4:5] -; GFX7-NEXT: v_cndmask_b32_e64 v4, v10, 0, s[6:7] -; GFX7-NEXT: v_cndmask_b32_e64 v5, v11, v7, s[6:7] -; GFX7-NEXT: v_cndmask_b32_e64 v6, v12, 0, s[8:9] -; GFX7-NEXT: v_cndmask_b32_e64 v7, v13, v7, s[8:9] +; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v16, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v1, v7, v17, vcc +; GFX7-NEXT: v_cndmask_b32_e64 v2, 0, v8, s[4:5] +; GFX7-NEXT: v_cndmask_b32_e64 v3, v7, v9, s[4:5] +; GFX7-NEXT: v_cndmask_b32_e64 v4, 0, v10, s[6:7] +; GFX7-NEXT: v_cndmask_b32_e64 v5, v7, v11, s[6:7] +; GFX7-NEXT: v_cndmask_b32_e64 v6, 0, v12, s[8:9] +; GFX7-NEXT: v_cndmask_b32_e64 v7, v7, v13, s[8:9] ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_maximum_v4f64__nsz: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_max_f64 v[16:17], v[0:1], v[8:9] -; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9] +; GFX8-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[8:9] ; GFX8-NEXT: v_max_f64 v[8:9], v[2:3], v[10:11] -; GFX8-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[10:11] +; GFX8-NEXT: v_cmp_o_f64_e64 s[4:5], v[2:3], v[10:11] ; GFX8-NEXT: v_max_f64 v[10:11], v[4:5], v[12:13] -; GFX8-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[12:13] +; GFX8-NEXT: v_cmp_o_f64_e64 s[6:7], v[4:5], v[12:13] ; GFX8-NEXT: v_max_f64 v[12:13], v[6:7], v[14:15] -; GFX8-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[14:15] +; GFX8-NEXT: v_cmp_o_f64_e64 s[8:9], v[6:7], v[14:15] ; GFX8-NEXT: v_mov_b32_e32 v7, 0x7ff80000 -; GFX8-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc -; GFX8-NEXT: v_cndmask_b32_e32 v1, v17, v7, vcc -; GFX8-NEXT: v_cndmask_b32_e64 v2, v8, 0, s[4:5] -; GFX8-NEXT: v_cndmask_b32_e64 v3, v9, v7, s[4:5] -; GFX8-NEXT: v_cndmask_b32_e64 v4, v10, 0, s[6:7] -; GFX8-NEXT: v_cndmask_b32_e64 v5, v11, v7, s[6:7] -; GFX8-NEXT: v_cndmask_b32_e64 v6, v12, 0, s[8:9] -; GFX8-NEXT: v_cndmask_b32_e64 v7, v13, v7, s[8:9] +; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v16, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v1, v7, v17, vcc +; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, v8, s[4:5] +; GFX8-NEXT: v_cndmask_b32_e64 v3, v7, v9, s[4:5] +; GFX8-NEXT: v_cndmask_b32_e64 v4, 0, v10, s[6:7] +; GFX8-NEXT: v_cndmask_b32_e64 v5, v7, v11, s[6:7] +; GFX8-NEXT: v_cndmask_b32_e64 v6, 0, v12, s[8:9] +; GFX8-NEXT: v_cndmask_b32_e64 v7, v7, v13, s[8:9] ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-LABEL: v_maximum_v4f64__nsz: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: v_max_f64 v[16:17], v[0:1], v[8:9] -; GFX900-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9] +; GFX900-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[8:9] ; GFX900-NEXT: v_max_f64 v[8:9], v[2:3], v[10:11] -; GFX900-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[10:11] +; GFX900-NEXT: v_cmp_o_f64_e64 s[4:5], v[2:3], v[10:11] ; GFX900-NEXT: v_max_f64 v[10:11], v[4:5], v[12:13] -; GFX900-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[12:13] +; GFX900-NEXT: v_cmp_o_f64_e64 s[6:7], v[4:5], v[12:13] ; GFX900-NEXT: v_max_f64 v[12:13], v[6:7], v[14:15] -; GFX900-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[14:15] +; GFX900-NEXT: v_cmp_o_f64_e64 s[8:9], v[6:7], v[14:15] ; GFX900-NEXT: v_mov_b32_e32 v7, 0x7ff80000 -; GFX900-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc -; GFX900-NEXT: v_cndmask_b32_e32 v1, v17, v7, vcc -; GFX900-NEXT: v_cndmask_b32_e64 v2, v8, 0, s[4:5] -; GFX900-NEXT: v_cndmask_b32_e64 v3, v9, v7, s[4:5] -; GFX900-NEXT: v_cndmask_b32_e64 v4, v10, 0, s[6:7] -; GFX900-NEXT: v_cndmask_b32_e64 v5, v11, v7, s[6:7] -; GFX900-NEXT: v_cndmask_b32_e64 v6, v12, 0, s[8:9] -; GFX900-NEXT: v_cndmask_b32_e64 v7, v13, v7, s[8:9] +; GFX900-NEXT: v_cndmask_b32_e32 v0, 0, v16, vcc +; GFX900-NEXT: v_cndmask_b32_e32 v1, v7, v17, vcc +; GFX900-NEXT: v_cndmask_b32_e64 v2, 0, v8, s[4:5] +; GFX900-NEXT: v_cndmask_b32_e64 v3, v7, v9, s[4:5] +; GFX900-NEXT: v_cndmask_b32_e64 v4, 0, v10, s[6:7] +; GFX900-NEXT: v_cndmask_b32_e64 v5, v7, v11, s[6:7] +; GFX900-NEXT: v_cndmask_b32_e64 v6, 0, v12, s[8:9] +; GFX900-NEXT: v_cndmask_b32_e64 v7, v7, v13, s[8:9] ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_maximum_v4f64__nsz: ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX950-NEXT: v_max_f64 v[16:17], v[0:1], v[8:9] -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9] +; GFX950-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[8:9] ; GFX950-NEXT: v_max_f64 v[8:9], v[2:3], v[10:11] ; GFX950-NEXT: s_nop 0 -; GFX950-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v0, 0, v16, vcc ; GFX950-NEXT: v_mov_b32_e32 v16, 0x7ff80000 -; GFX950-NEXT: v_cndmask_b32_e32 v1, v17, v16, vcc -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[10:11] +; GFX950-NEXT: v_cndmask_b32_e32 v1, v16, v17, vcc +; GFX950-NEXT: v_cmp_o_f64_e32 vcc, v[2:3], v[10:11] ; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e64 v2, v8, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v3, v9, v16, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v2, 0, v8, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v3, v16, v9, vcc ; GFX950-NEXT: v_max_f64 v[8:9], v[4:5], v[12:13] -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[12:13] +; GFX950-NEXT: v_cmp_o_f64_e32 vcc, v[4:5], v[12:13] ; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e64 v4, v8, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v5, v9, v16, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v5, v16, v9, vcc ; GFX950-NEXT: v_max_f64 v[8:9], v[6:7], v[14:15] -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[6:7], v[14:15] +; GFX950-NEXT: v_cmp_o_f64_e32 vcc, v[6:7], v[14:15] ; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e64 v6, v8, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v7, v9, v16, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v6, 0, v8, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v7, v16, v9, vcc ; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_maximum_v4f64__nsz: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_max_f64 v[16:17], v[0:1], v[8:9] -; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[8:9] +; GFX10-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[8:9] ; GFX10-NEXT: v_max_f64 v[8:9], v[2:3], v[10:11] -; GFX10-NEXT: v_cmp_u_f64_e64 s4, v[2:3], v[10:11] +; GFX10-NEXT: v_cmp_o_f64_e64 s4, v[2:3], v[10:11] ; GFX10-NEXT: v_max_f64 v[10:11], v[4:5], v[12:13] -; GFX10-NEXT: v_cmp_u_f64_e64 s5, v[4:5], v[12:13] +; GFX10-NEXT: v_cmp_o_f64_e64 s5, v[4:5], v[12:13] ; GFX10-NEXT: v_max_f64 v[12:13], v[6:7], v[14:15] -; GFX10-NEXT: v_cmp_u_f64_e64 s6, v[6:7], v[14:15] -; GFX10-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v1, v17, 0x7ff80000, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v2, v8, 0, s4 -; GFX10-NEXT: v_cndmask_b32_e64 v3, v9, 0x7ff80000, s4 -; GFX10-NEXT: v_cndmask_b32_e64 v4, v10, 0, s5 -; GFX10-NEXT: v_cndmask_b32_e64 v5, v11, 0x7ff80000, s5 -; GFX10-NEXT: v_cndmask_b32_e64 v6, v12, 0, s6 -; GFX10-NEXT: v_cndmask_b32_e64 v7, v13, 0x7ff80000, s6 +; GFX10-NEXT: v_cmp_o_f64_e64 s6, v[6:7], v[14:15] +; GFX10-NEXT: v_cndmask_b32_e32 v0, 0, v16, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v17, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, v8, s4 +; GFX10-NEXT: v_cndmask_b32_e64 v3, 0x7ff80000, v9, s4 +; GFX10-NEXT: v_cndmask_b32_e64 v4, 0, v10, s5 +; GFX10-NEXT: v_cndmask_b32_e64 v5, 0x7ff80000, v11, s5 +; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, v12, s6 +; GFX10-NEXT: v_cndmask_b32_e64 v7, 0x7ff80000, v13, s6 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_maximum_v4f64__nsz: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_max_f64 v[16:17], v[0:1], v[8:9] -; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[8:9] +; GFX11-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[8:9] ; GFX11-NEXT: v_max_f64 v[8:9], v[2:3], v[10:11] -; GFX11-NEXT: v_cmp_u_f64_e64 s0, v[2:3], v[10:11] +; GFX11-NEXT: v_cmp_o_f64_e64 s0, v[2:3], v[10:11] ; GFX11-NEXT: v_max_f64 v[10:11], v[4:5], v[12:13] -; GFX11-NEXT: v_cmp_u_f64_e64 s1, v[4:5], v[12:13] +; GFX11-NEXT: v_cmp_o_f64_e64 s1, v[4:5], v[12:13] ; GFX11-NEXT: v_max_f64 v[12:13], v[6:7], v[14:15] -; GFX11-NEXT: v_cmp_u_f64_e64 s2, v[6:7], v[14:15] -; GFX11-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v1, v17, 0x7ff80000, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v2, v8, 0, s0 -; GFX11-NEXT: v_cndmask_b32_e64 v3, v9, 0x7ff80000, s0 -; GFX11-NEXT: v_cndmask_b32_e64 v4, v10, 0, s1 -; GFX11-NEXT: v_cndmask_b32_e64 v5, v11, 0x7ff80000, s1 -; GFX11-NEXT: v_cndmask_b32_e64 v6, v12, 0, s2 -; GFX11-NEXT: v_cndmask_b32_e64 v7, v13, 0x7ff80000, s2 +; GFX11-NEXT: v_cmp_o_f64_e64 s2, v[6:7], v[14:15] +; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v16, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v17, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, v8, s0 +; GFX11-NEXT: v_cndmask_b32_e64 v3, 0x7ff80000, v9, s0 +; GFX11-NEXT: v_cndmask_b32_e64 v4, 0, v10, s1 +; GFX11-NEXT: v_cndmask_b32_e64 v5, 0x7ff80000, v11, s1 +; GFX11-NEXT: v_cndmask_b32_e64 v6, 0, v12, s2 +; GFX11-NEXT: v_cndmask_b32_e64 v7, 0x7ff80000, v13, s2 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: v_maximum_v4f64__nsz: @@ -1743,14 +1776,24 @@ define <8 x double> @v_maximum_v8f64(<8 x double> %src0, <8 x double> %src1) { ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 +<<<<<<< HEAD ; GFX7-NEXT: v_max_f64 v[32:33], v[0:1], v[16:17] ; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[16:17] ; GFX7-NEXT: v_max_f64 v[16:17], v[2:3], v[18:19] ; GFX7-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[18:19] +======= +; GFX7-NEXT: v_max_f64 v[32:33], v[2:3], v[18:19] +; GFX7-NEXT: v_cmp_o_f64_e32 vcc, v[2:3], v[18:19] +; GFX7-NEXT: v_max_f64 v[18:19], v[4:5], v[20:21] +; GFX7-NEXT: v_cmp_o_f64_e64 s[4:5], v[4:5], v[20:21] +; GFX7-NEXT: v_max_f64 v[2:3], v[0:1], v[16:17] +; GFX7-NEXT: v_cmp_o_f64_e64 s[8:9], v[0:1], v[16:17] +>>>>>>> 1336afc5defe (update tests) ; GFX7-NEXT: v_mov_b32_e32 v34, 0x7ff80000 ; GFX7-NEXT: v_max_f64 v[18:19], v[4:5], v[20:21] ; GFX7-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[20:21] ; GFX7-NEXT: v_max_f64 v[20:21], v[6:7], v[22:23] +<<<<<<< HEAD ; GFX7-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[22:23] ; GFX7-NEXT: v_max_f64 v[22:23], v[8:9], v[24:25] ; GFX7-NEXT: v_cmp_u_f64_e64 s[10:11], v[8:9], v[24:25] @@ -1777,20 +1820,58 @@ define <8 x double> @v_maximum_v8f64(<8 x double> %src0, <8 x double> %src1) { ; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[14:15], v[30:31] ; GFX7-NEXT: v_cndmask_b32_e64 v14, v16, 0, vcc ; GFX7-NEXT: v_cndmask_b32_e32 v15, v17, v34, vcc +======= +; GFX7-NEXT: v_cmp_o_f64_e64 s[6:7], v[6:7], v[22:23] +; GFX7-NEXT: v_max_f64 v[16:17], v[8:9], v[24:25] +; GFX7-NEXT: v_cmp_o_f64_e64 s[10:11], v[8:9], v[24:25] +; GFX7-NEXT: v_max_f64 v[22:23], v[10:11], v[26:27] +; GFX7-NEXT: v_cmp_o_f64_e64 s[12:13], v[10:11], v[26:27] +; GFX7-NEXT: v_max_f64 v[24:25], v[12:13], v[28:29] +; GFX7-NEXT: v_cmp_o_f64_e64 s[14:15], v[12:13], v[28:29] +; GFX7-NEXT: v_cndmask_b32_e64 v0, 0, v2, s[8:9] +; GFX7-NEXT: v_cndmask_b32_e64 v1, v34, v3, s[8:9] +; GFX7-NEXT: v_cndmask_b32_e32 v2, 0, v32, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v3, v34, v33, vcc +; GFX7-NEXT: v_cndmask_b32_e64 v4, 0, v18, s[4:5] +; GFX7-NEXT: v_cndmask_b32_e64 v5, v34, v19, s[4:5] +; GFX7-NEXT: v_cndmask_b32_e64 v6, 0, v20, s[6:7] +; GFX7-NEXT: v_cndmask_b32_e64 v7, v34, v21, s[6:7] +; GFX7-NEXT: v_cndmask_b32_e64 v8, 0, v16, s[10:11] +; GFX7-NEXT: v_cndmask_b32_e64 v9, v34, v17, s[10:11] +; GFX7-NEXT: v_cndmask_b32_e64 v10, 0, v22, s[12:13] +; GFX7-NEXT: v_cndmask_b32_e64 v11, v34, v23, s[12:13] +; GFX7-NEXT: v_cndmask_b32_e64 v12, 0, v24, s[14:15] +; GFX7-NEXT: v_cndmask_b32_e64 v13, v34, v25, s[14:15] +; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: v_max_f64 v[18:19], v[14:15], v[30:31] +; GFX7-NEXT: v_cmp_o_f64_e32 vcc, v[14:15], v[30:31] +; GFX7-NEXT: v_cndmask_b32_e32 v14, 0, v18, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v15, v34, v19, vcc +>>>>>>> 1336afc5defe (update tests) ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_maximum_v8f64: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 +<<<<<<< HEAD ; GFX8-NEXT: v_max_f64 v[32:33], v[0:1], v[16:17] ; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[16:17] ; GFX8-NEXT: v_max_f64 v[16:17], v[2:3], v[18:19] ; GFX8-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[18:19] +======= +; GFX8-NEXT: v_max_f64 v[32:33], v[2:3], v[18:19] +; GFX8-NEXT: v_cmp_o_f64_e32 vcc, v[2:3], v[18:19] +; GFX8-NEXT: v_max_f64 v[18:19], v[4:5], v[20:21] +; GFX8-NEXT: v_cmp_o_f64_e64 s[4:5], v[4:5], v[20:21] +; GFX8-NEXT: v_max_f64 v[2:3], v[0:1], v[16:17] +; GFX8-NEXT: v_cmp_o_f64_e64 s[8:9], v[0:1], v[16:17] +>>>>>>> 1336afc5defe (update tests) ; GFX8-NEXT: v_mov_b32_e32 v34, 0x7ff80000 ; GFX8-NEXT: v_max_f64 v[18:19], v[4:5], v[20:21] ; GFX8-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[20:21] ; GFX8-NEXT: v_max_f64 v[20:21], v[6:7], v[22:23] +<<<<<<< HEAD ; GFX8-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[22:23] ; GFX8-NEXT: v_max_f64 v[22:23], v[8:9], v[24:25] ; GFX8-NEXT: v_cmp_u_f64_e64 s[10:11], v[8:9], v[24:25] @@ -1817,20 +1898,58 @@ define <8 x double> @v_maximum_v8f64(<8 x double> %src0, <8 x double> %src1) { ; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[14:15], v[30:31] ; GFX8-NEXT: v_cndmask_b32_e64 v14, v16, 0, vcc ; GFX8-NEXT: v_cndmask_b32_e32 v15, v17, v34, vcc +======= +; GFX8-NEXT: v_cmp_o_f64_e64 s[6:7], v[6:7], v[22:23] +; GFX8-NEXT: v_max_f64 v[16:17], v[8:9], v[24:25] +; GFX8-NEXT: v_cmp_o_f64_e64 s[10:11], v[8:9], v[24:25] +; GFX8-NEXT: v_max_f64 v[22:23], v[10:11], v[26:27] +; GFX8-NEXT: v_cmp_o_f64_e64 s[12:13], v[10:11], v[26:27] +; GFX8-NEXT: v_max_f64 v[24:25], v[12:13], v[28:29] +; GFX8-NEXT: v_cmp_o_f64_e64 s[14:15], v[12:13], v[28:29] +; GFX8-NEXT: v_cndmask_b32_e64 v0, 0, v2, s[8:9] +; GFX8-NEXT: v_cndmask_b32_e64 v1, v34, v3, s[8:9] +; GFX8-NEXT: v_cndmask_b32_e32 v2, 0, v32, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v3, v34, v33, vcc +; GFX8-NEXT: v_cndmask_b32_e64 v4, 0, v18, s[4:5] +; GFX8-NEXT: v_cndmask_b32_e64 v5, v34, v19, s[4:5] +; GFX8-NEXT: v_cndmask_b32_e64 v6, 0, v20, s[6:7] +; GFX8-NEXT: v_cndmask_b32_e64 v7, v34, v21, s[6:7] +; GFX8-NEXT: v_cndmask_b32_e64 v8, 0, v16, s[10:11] +; GFX8-NEXT: v_cndmask_b32_e64 v9, v34, v17, s[10:11] +; GFX8-NEXT: v_cndmask_b32_e64 v10, 0, v22, s[12:13] +; GFX8-NEXT: v_cndmask_b32_e64 v11, v34, v23, s[12:13] +; GFX8-NEXT: v_cndmask_b32_e64 v12, 0, v24, s[14:15] +; GFX8-NEXT: v_cndmask_b32_e64 v13, v34, v25, s[14:15] +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: v_max_f64 v[18:19], v[14:15], v[30:31] +; GFX8-NEXT: v_cmp_o_f64_e32 vcc, v[14:15], v[30:31] +; GFX8-NEXT: v_cndmask_b32_e32 v14, 0, v18, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v15, v34, v19, vcc +>>>>>>> 1336afc5defe (update tests) ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-LABEL: v_maximum_v8f64: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 +<<<<<<< HEAD ; GFX900-NEXT: v_max_f64 v[32:33], v[0:1], v[16:17] ; GFX900-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[16:17] ; GFX900-NEXT: v_max_f64 v[16:17], v[2:3], v[18:19] ; GFX900-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[18:19] +======= +; GFX900-NEXT: v_max_f64 v[32:33], v[2:3], v[18:19] +; GFX900-NEXT: v_cmp_o_f64_e32 vcc, v[2:3], v[18:19] +; GFX900-NEXT: v_max_f64 v[18:19], v[4:5], v[20:21] +; GFX900-NEXT: v_cmp_o_f64_e64 s[4:5], v[4:5], v[20:21] +; GFX900-NEXT: v_max_f64 v[2:3], v[0:1], v[16:17] +; GFX900-NEXT: v_cmp_o_f64_e64 s[8:9], v[0:1], v[16:17] +>>>>>>> 1336afc5defe (update tests) ; GFX900-NEXT: v_mov_b32_e32 v34, 0x7ff80000 ; GFX900-NEXT: v_max_f64 v[18:19], v[4:5], v[20:21] ; GFX900-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[20:21] ; GFX900-NEXT: v_max_f64 v[20:21], v[6:7], v[22:23] +<<<<<<< HEAD ; GFX900-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[22:23] ; GFX900-NEXT: v_max_f64 v[22:23], v[8:9], v[24:25] ; GFX900-NEXT: v_cmp_u_f64_e64 s[10:11], v[8:9], v[24:25] @@ -1857,6 +1976,34 @@ define <8 x double> @v_maximum_v8f64(<8 x double> %src0, <8 x double> %src1) { ; GFX900-NEXT: v_cmp_u_f64_e32 vcc, v[14:15], v[30:31] ; GFX900-NEXT: v_cndmask_b32_e64 v14, v16, 0, vcc ; GFX900-NEXT: v_cndmask_b32_e32 v15, v17, v34, vcc +======= +; GFX900-NEXT: v_cmp_o_f64_e64 s[6:7], v[6:7], v[22:23] +; GFX900-NEXT: v_max_f64 v[16:17], v[8:9], v[24:25] +; GFX900-NEXT: v_cmp_o_f64_e64 s[10:11], v[8:9], v[24:25] +; GFX900-NEXT: v_max_f64 v[22:23], v[10:11], v[26:27] +; GFX900-NEXT: v_cmp_o_f64_e64 s[12:13], v[10:11], v[26:27] +; GFX900-NEXT: v_max_f64 v[24:25], v[12:13], v[28:29] +; GFX900-NEXT: v_cmp_o_f64_e64 s[14:15], v[12:13], v[28:29] +; GFX900-NEXT: v_cndmask_b32_e64 v0, 0, v2, s[8:9] +; GFX900-NEXT: v_cndmask_b32_e64 v1, v34, v3, s[8:9] +; GFX900-NEXT: v_cndmask_b32_e32 v2, 0, v32, vcc +; GFX900-NEXT: v_cndmask_b32_e32 v3, v34, v33, vcc +; GFX900-NEXT: v_cndmask_b32_e64 v4, 0, v18, s[4:5] +; GFX900-NEXT: v_cndmask_b32_e64 v5, v34, v19, s[4:5] +; GFX900-NEXT: v_cndmask_b32_e64 v6, 0, v20, s[6:7] +; GFX900-NEXT: v_cndmask_b32_e64 v7, v34, v21, s[6:7] +; GFX900-NEXT: v_cndmask_b32_e64 v8, 0, v16, s[10:11] +; GFX900-NEXT: v_cndmask_b32_e64 v9, v34, v17, s[10:11] +; GFX900-NEXT: v_cndmask_b32_e64 v10, 0, v22, s[12:13] +; GFX900-NEXT: v_cndmask_b32_e64 v11, v34, v23, s[12:13] +; GFX900-NEXT: v_cndmask_b32_e64 v12, 0, v24, s[14:15] +; GFX900-NEXT: v_cndmask_b32_e64 v13, v34, v25, s[14:15] +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: v_max_f64 v[18:19], v[14:15], v[30:31] +; GFX900-NEXT: v_cmp_o_f64_e32 vcc, v[14:15], v[30:31] +; GFX900-NEXT: v_cndmask_b32_e32 v14, 0, v18, vcc +; GFX900-NEXT: v_cndmask_b32_e32 v15, v34, v19, vcc +>>>>>>> 1336afc5defe (update tests) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_maximum_v8f64: @@ -1865,42 +2012,42 @@ define <8 x double> @v_maximum_v8f64(<8 x double> %src0, <8 x double> %src1) { ; GFX950-NEXT: scratch_load_dword v31, off, s32 ; GFX950-NEXT: v_mov_b32_e32 v54, 0x7ff80000 ; GFX950-NEXT: v_max_f64 v[32:33], v[0:1], v[16:17] -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[16:17] +; GFX950-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[16:17] ; GFX950-NEXT: v_max_f64 v[34:35], v[2:3], v[18:19] ; GFX950-NEXT: v_max_f64 v[36:37], v[4:5], v[20:21] -; GFX950-NEXT: v_cndmask_b32_e64 v0, v32, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v1, v33, v54, vcc -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[18:19] +; GFX950-NEXT: v_cndmask_b32_e32 v0, 0, v32, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v1, v54, v33, vcc +; GFX950-NEXT: v_cmp_o_f64_e32 vcc, v[2:3], v[18:19] ; GFX950-NEXT: v_max_f64 v[38:39], v[6:7], v[22:23] ; GFX950-NEXT: v_max_f64 v[48:49], v[8:9], v[24:25] -; GFX950-NEXT: v_cndmask_b32_e64 v2, v34, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v3, v35, v54, vcc -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[20:21] +; GFX950-NEXT: v_cndmask_b32_e32 v2, 0, v34, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v3, v54, v35, vcc +; GFX950-NEXT: v_cmp_o_f64_e32 vcc, v[4:5], v[20:21] ; GFX950-NEXT: v_max_f64 v[50:51], v[10:11], v[26:27] ; GFX950-NEXT: v_max_f64 v[52:53], v[12:13], v[28:29] -; GFX950-NEXT: v_cndmask_b32_e64 v4, v36, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v5, v37, v54, vcc -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[6:7], v[22:23] +; GFX950-NEXT: v_cndmask_b32_e32 v4, 0, v36, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v5, v54, v37, vcc +; GFX950-NEXT: v_cmp_o_f64_e32 vcc, v[6:7], v[22:23] ; GFX950-NEXT: s_waitcnt vmcnt(0) ; GFX950-NEXT: v_max_f64 v[16:17], v[14:15], v[30:31] -; GFX950-NEXT: v_cndmask_b32_e64 v6, v38, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v7, v39, v54, vcc -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[8:9], v[24:25] +; GFX950-NEXT: v_cndmask_b32_e32 v6, 0, v38, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v7, v54, v39, vcc +; GFX950-NEXT: v_cmp_o_f64_e32 vcc, v[8:9], v[24:25] ; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e64 v8, v48, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v9, v49, v54, vcc -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[10:11], v[26:27] +; GFX950-NEXT: v_cndmask_b32_e32 v8, 0, v48, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v9, v54, v49, vcc +; GFX950-NEXT: v_cmp_o_f64_e32 vcc, v[10:11], v[26:27] ; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e64 v10, v50, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v11, v51, v54, vcc -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[12:13], v[28:29] +; GFX950-NEXT: v_cndmask_b32_e32 v10, 0, v50, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v11, v54, v51, vcc +; GFX950-NEXT: v_cmp_o_f64_e32 vcc, v[12:13], v[28:29] ; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e64 v12, v52, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v13, v53, v54, vcc -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[14:15], v[30:31] +; GFX950-NEXT: v_cndmask_b32_e32 v12, 0, v52, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v13, v54, v53, vcc +; GFX950-NEXT: v_cmp_o_f64_e32 vcc, v[14:15], v[30:31] ; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e64 v14, v16, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v15, v17, v54, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v14, 0, v16, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v15, v54, v17, vcc ; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_maximum_v8f64: @@ -1908,38 +2055,38 @@ define <8 x double> @v_maximum_v8f64(<8 x double> %src0, <8 x double> %src1) { ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; GFX10-NEXT: v_max_f64 v[32:33], v[0:1], v[16:17] -; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[16:17] +; GFX10-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[16:17] ; GFX10-NEXT: v_max_f64 v[16:17], v[2:3], v[18:19] -; GFX10-NEXT: v_cmp_u_f64_e64 s4, v[2:3], v[18:19] +; GFX10-NEXT: v_cmp_o_f64_e64 s4, v[2:3], v[18:19] ; GFX10-NEXT: v_max_f64 v[18:19], v[4:5], v[20:21] -; GFX10-NEXT: v_cmp_u_f64_e64 s5, v[4:5], v[20:21] +; GFX10-NEXT: v_cmp_o_f64_e64 s5, v[4:5], v[20:21] ; GFX10-NEXT: v_max_f64 v[20:21], v[6:7], v[22:23] -; GFX10-NEXT: v_cmp_u_f64_e64 s6, v[6:7], v[22:23] +; GFX10-NEXT: v_cmp_o_f64_e64 s6, v[6:7], v[22:23] ; GFX10-NEXT: v_max_f64 v[22:23], v[8:9], v[24:25] -; GFX10-NEXT: v_cmp_u_f64_e64 s7, v[8:9], v[24:25] +; GFX10-NEXT: v_cmp_o_f64_e64 s7, v[8:9], v[24:25] ; GFX10-NEXT: v_max_f64 v[24:25], v[10:11], v[26:27] -; GFX10-NEXT: v_cmp_u_f64_e64 s8, v[10:11], v[26:27] +; GFX10-NEXT: v_cmp_o_f64_e64 s8, v[10:11], v[26:27] ; GFX10-NEXT: v_max_f64 v[26:27], v[12:13], v[28:29] -; GFX10-NEXT: v_cmp_u_f64_e64 s9, v[12:13], v[28:29] -; GFX10-NEXT: v_cndmask_b32_e64 v0, v32, 0, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v1, v33, 0x7ff80000, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v2, v16, 0, s4 -; GFX10-NEXT: v_cndmask_b32_e64 v3, v17, 0x7ff80000, s4 -; GFX10-NEXT: v_cndmask_b32_e64 v4, v18, 0, s5 -; GFX10-NEXT: v_cndmask_b32_e64 v5, v19, 0x7ff80000, s5 -; GFX10-NEXT: v_cndmask_b32_e64 v6, v20, 0, s6 -; GFX10-NEXT: v_cndmask_b32_e64 v7, v21, 0x7ff80000, s6 -; GFX10-NEXT: v_cndmask_b32_e64 v8, v22, 0, s7 -; GFX10-NEXT: v_cndmask_b32_e64 v9, v23, 0x7ff80000, s7 -; GFX10-NEXT: v_cndmask_b32_e64 v10, v24, 0, s8 -; GFX10-NEXT: v_cndmask_b32_e64 v11, v25, 0x7ff80000, s8 -; GFX10-NEXT: v_cndmask_b32_e64 v12, v26, 0, s9 -; GFX10-NEXT: v_cndmask_b32_e64 v13, v27, 0x7ff80000, s9 +; GFX10-NEXT: v_cmp_o_f64_e64 s9, v[12:13], v[28:29] +; GFX10-NEXT: v_cndmask_b32_e32 v0, 0, v32, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v33, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, v16, s4 +; GFX10-NEXT: v_cndmask_b32_e64 v3, 0x7ff80000, v17, s4 +; GFX10-NEXT: v_cndmask_b32_e64 v4, 0, v18, s5 +; GFX10-NEXT: v_cndmask_b32_e64 v5, 0x7ff80000, v19, s5 +; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, v20, s6 +; GFX10-NEXT: v_cndmask_b32_e64 v7, 0x7ff80000, v21, s6 +; GFX10-NEXT: v_cndmask_b32_e64 v8, 0, v22, s7 +; GFX10-NEXT: v_cndmask_b32_e64 v9, 0x7ff80000, v23, s7 +; GFX10-NEXT: v_cndmask_b32_e64 v10, 0, v24, s8 +; GFX10-NEXT: v_cndmask_b32_e64 v11, 0x7ff80000, v25, s8 +; GFX10-NEXT: v_cndmask_b32_e64 v12, 0, v26, s9 +; GFX10-NEXT: v_cndmask_b32_e64 v13, 0x7ff80000, v27, s9 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_max_f64 v[28:29], v[14:15], v[30:31] -; GFX10-NEXT: v_cmp_u_f64_e64 s10, v[14:15], v[30:31] -; GFX10-NEXT: v_cndmask_b32_e64 v14, v28, 0, s10 -; GFX10-NEXT: v_cndmask_b32_e64 v15, v29, 0x7ff80000, s10 +; GFX10-NEXT: v_cmp_o_f64_e64 s10, v[14:15], v[30:31] +; GFX10-NEXT: v_cndmask_b32_e64 v14, 0, v28, s10 +; GFX10-NEXT: v_cndmask_b32_e64 v15, 0x7ff80000, v29, s10 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_maximum_v8f64: @@ -1947,39 +2094,39 @@ define <8 x double> @v_maximum_v8f64(<8 x double> %src0, <8 x double> %src1) { ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: scratch_load_b32 v31, off, s32 ; GFX11-NEXT: v_max_f64 v[32:33], v[0:1], v[16:17] -; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[16:17] +; GFX11-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[16:17] ; GFX11-NEXT: v_max_f64 v[16:17], v[2:3], v[18:19] -; GFX11-NEXT: v_cmp_u_f64_e64 s0, v[2:3], v[18:19] +; GFX11-NEXT: v_cmp_o_f64_e64 s0, v[2:3], v[18:19] ; GFX11-NEXT: v_max_f64 v[18:19], v[4:5], v[20:21] -; GFX11-NEXT: v_cmp_u_f64_e64 s1, v[4:5], v[20:21] +; GFX11-NEXT: v_cmp_o_f64_e64 s1, v[4:5], v[20:21] ; GFX11-NEXT: v_max_f64 v[20:21], v[6:7], v[22:23] -; GFX11-NEXT: v_cmp_u_f64_e64 s2, v[6:7], v[22:23] +; GFX11-NEXT: v_cmp_o_f64_e64 s2, v[6:7], v[22:23] ; GFX11-NEXT: v_max_f64 v[22:23], v[8:9], v[24:25] -; GFX11-NEXT: v_cmp_u_f64_e64 s3, v[8:9], v[24:25] +; GFX11-NEXT: v_cmp_o_f64_e64 s3, v[8:9], v[24:25] ; GFX11-NEXT: v_max_f64 v[24:25], v[10:11], v[26:27] -; GFX11-NEXT: v_cmp_u_f64_e64 s4, v[10:11], v[26:27] +; GFX11-NEXT: v_cmp_o_f64_e64 s4, v[10:11], v[26:27] ; GFX11-NEXT: v_max_f64 v[26:27], v[12:13], v[28:29] -; GFX11-NEXT: v_cmp_u_f64_e64 s5, v[12:13], v[28:29] -; GFX11-NEXT: v_cndmask_b32_e64 v0, v32, 0, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v1, v33, 0x7ff80000, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v2, v16, 0, s0 -; GFX11-NEXT: v_cndmask_b32_e64 v3, v17, 0x7ff80000, s0 -; GFX11-NEXT: v_cndmask_b32_e64 v4, v18, 0, s1 -; GFX11-NEXT: v_cndmask_b32_e64 v5, v19, 0x7ff80000, s1 -; GFX11-NEXT: v_cndmask_b32_e64 v6, v20, 0, s2 -; GFX11-NEXT: v_cndmask_b32_e64 v7, v21, 0x7ff80000, s2 -; GFX11-NEXT: v_cndmask_b32_e64 v8, v22, 0, s3 -; GFX11-NEXT: v_cndmask_b32_e64 v9, v23, 0x7ff80000, s3 -; GFX11-NEXT: v_cndmask_b32_e64 v10, v24, 0, s4 -; GFX11-NEXT: v_cndmask_b32_e64 v11, v25, 0x7ff80000, s4 -; GFX11-NEXT: v_cndmask_b32_e64 v12, v26, 0, s5 -; GFX11-NEXT: v_cndmask_b32_e64 v13, v27, 0x7ff80000, s5 +; GFX11-NEXT: v_cmp_o_f64_e64 s5, v[12:13], v[28:29] +; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v32, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v33, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, v16, s0 +; GFX11-NEXT: v_cndmask_b32_e64 v3, 0x7ff80000, v17, s0 +; GFX11-NEXT: v_cndmask_b32_e64 v4, 0, v18, s1 +; GFX11-NEXT: v_cndmask_b32_e64 v5, 0x7ff80000, v19, s1 +; GFX11-NEXT: v_cndmask_b32_e64 v6, 0, v20, s2 +; GFX11-NEXT: v_cndmask_b32_e64 v7, 0x7ff80000, v21, s2 +; GFX11-NEXT: v_cndmask_b32_e64 v8, 0, v22, s3 +; GFX11-NEXT: v_cndmask_b32_e64 v9, 0x7ff80000, v23, s3 +; GFX11-NEXT: v_cndmask_b32_e64 v10, 0, v24, s4 +; GFX11-NEXT: v_cndmask_b32_e64 v11, 0x7ff80000, v25, s4 +; GFX11-NEXT: v_cndmask_b32_e64 v12, 0, v26, s5 +; GFX11-NEXT: v_cndmask_b32_e64 v13, 0x7ff80000, v27, s5 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_max_f64 v[28:29], v[14:15], v[30:31] -; GFX11-NEXT: v_cmp_u_f64_e64 s6, v[14:15], v[30:31] +; GFX11-NEXT: v_cmp_o_f64_e64 s6, v[14:15], v[30:31] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-NEXT: v_cndmask_b32_e64 v14, v28, 0, s6 -; GFX11-NEXT: v_cndmask_b32_e64 v15, v29, 0x7ff80000, s6 +; GFX11-NEXT: v_cndmask_b32_e64 v14, 0, v28, s6 +; GFX11-NEXT: v_cndmask_b32_e64 v15, 0x7ff80000, v29, s6 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: v_maximum_v8f64: @@ -2011,117 +2158,117 @@ define <16 x double> @v_maximum_v16f64(<16 x double> %src0, <16 x double> %src1) ; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:8 ; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4 ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[31:32] +; GFX7-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[31:32] ; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[31:32] ; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:16 ; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:12 -; GFX7-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[31:32] +; GFX7-NEXT: v_cmp_o_f64_e64 s[4:5], v[2:3], v[31:32] ; GFX7-NEXT: v_max_f64 v[2:3], v[2:3], v[31:32] ; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:24 ; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:20 -; GFX7-NEXT: v_cndmask_b32_e64 v2, v2, 0, s[4:5] +; GFX7-NEXT: v_cndmask_b32_e64 v2, 0, v2, s[4:5] ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[31:32] +; GFX7-NEXT: v_cmp_o_f64_e64 s[6:7], v[4:5], v[31:32] ; GFX7-NEXT: v_max_f64 v[4:5], v[4:5], v[31:32] ; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:32 ; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:28 -; GFX7-NEXT: v_cndmask_b32_e64 v4, v4, 0, s[6:7] +; GFX7-NEXT: v_cndmask_b32_e64 v4, 0, v4, s[6:7] ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[31:32] +; GFX7-NEXT: v_cmp_o_f64_e64 s[8:9], v[6:7], v[31:32] ; GFX7-NEXT: v_max_f64 v[6:7], v[6:7], v[31:32] ; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:36 ; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:40 -; GFX7-NEXT: v_cndmask_b32_e64 v6, v6, 0, s[8:9] +; GFX7-NEXT: v_cndmask_b32_e64 v6, 0, v6, s[8:9] ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cmp_u_f64_e64 s[10:11], v[8:9], v[31:32] +; GFX7-NEXT: v_cmp_o_f64_e64 s[10:11], v[8:9], v[31:32] ; GFX7-NEXT: v_max_f64 v[8:9], v[8:9], v[31:32] ; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:48 ; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:44 -; GFX7-NEXT: v_cndmask_b32_e64 v8, v8, 0, s[10:11] +; GFX7-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[10:11] ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cmp_u_f64_e64 s[12:13], v[10:11], v[31:32] +; GFX7-NEXT: v_cmp_o_f64_e64 s[12:13], v[10:11], v[31:32] ; GFX7-NEXT: v_max_f64 v[10:11], v[10:11], v[31:32] ; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:56 ; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:52 -; GFX7-NEXT: v_cndmask_b32_e64 v10, v10, 0, s[12:13] +; GFX7-NEXT: v_cndmask_b32_e64 v10, 0, v10, s[12:13] ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cmp_u_f64_e64 s[14:15], v[12:13], v[31:32] +; GFX7-NEXT: v_cmp_o_f64_e64 s[14:15], v[12:13], v[31:32] ; GFX7-NEXT: v_max_f64 v[12:13], v[12:13], v[31:32] ; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:64 ; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:60 -; GFX7-NEXT: v_cndmask_b32_e64 v12, v12, 0, s[14:15] +; GFX7-NEXT: v_cndmask_b32_e64 v12, 0, v12, s[14:15] ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cmp_u_f64_e64 s[16:17], v[14:15], v[31:32] +; GFX7-NEXT: v_cmp_o_f64_e64 s[16:17], v[14:15], v[31:32] ; GFX7-NEXT: v_max_f64 v[14:15], v[14:15], v[31:32] ; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:68 ; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:72 -; GFX7-NEXT: v_cndmask_b32_e64 v14, v14, 0, s[16:17] +; GFX7-NEXT: v_cndmask_b32_e64 v14, 0, v14, s[16:17] ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cmp_u_f64_e64 s[18:19], v[16:17], v[31:32] +; GFX7-NEXT: v_cmp_o_f64_e64 s[18:19], v[16:17], v[31:32] ; GFX7-NEXT: v_max_f64 v[16:17], v[16:17], v[31:32] ; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:80 ; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:76 -; GFX7-NEXT: v_cndmask_b32_e64 v16, v16, 0, s[18:19] +; GFX7-NEXT: v_cndmask_b32_e64 v16, 0, v16, s[18:19] ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cmp_u_f64_e64 s[20:21], v[18:19], v[31:32] +; GFX7-NEXT: v_cmp_o_f64_e64 s[20:21], v[18:19], v[31:32] ; GFX7-NEXT: v_max_f64 v[18:19], v[18:19], v[31:32] ; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:88 ; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:84 -; GFX7-NEXT: v_cndmask_b32_e64 v18, v18, 0, s[20:21] +; GFX7-NEXT: v_cndmask_b32_e64 v18, 0, v18, s[20:21] ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cmp_u_f64_e64 s[22:23], v[20:21], v[31:32] +; GFX7-NEXT: v_cmp_o_f64_e64 s[22:23], v[20:21], v[31:32] ; GFX7-NEXT: v_max_f64 v[20:21], v[20:21], v[31:32] ; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:96 ; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:92 -; GFX7-NEXT: v_cndmask_b32_e64 v20, v20, 0, s[22:23] +; GFX7-NEXT: v_cndmask_b32_e64 v20, 0, v20, s[22:23] ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cmp_u_f64_e64 s[24:25], v[22:23], v[31:32] +; GFX7-NEXT: v_cmp_o_f64_e64 s[24:25], v[22:23], v[31:32] ; GFX7-NEXT: v_max_f64 v[22:23], v[22:23], v[31:32] ; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:100 ; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:104 -; GFX7-NEXT: v_cndmask_b32_e64 v22, v22, 0, s[24:25] +; GFX7-NEXT: v_cndmask_b32_e64 v22, 0, v22, s[24:25] ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cmp_u_f64_e64 s[26:27], v[24:25], v[31:32] +; GFX7-NEXT: v_cmp_o_f64_e64 s[26:27], v[24:25], v[31:32] ; GFX7-NEXT: v_max_f64 v[24:25], v[24:25], v[31:32] ; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:112 ; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:108 -; GFX7-NEXT: v_cndmask_b32_e64 v24, v24, 0, s[26:27] +; GFX7-NEXT: v_cndmask_b32_e64 v24, 0, v24, s[26:27] ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cmp_u_f64_e64 s[28:29], v[26:27], v[31:32] +; GFX7-NEXT: v_cmp_o_f64_e64 s[28:29], v[26:27], v[31:32] ; GFX7-NEXT: v_max_f64 v[26:27], v[26:27], v[31:32] ; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:120 ; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:116 -; GFX7-NEXT: v_cndmask_b32_e64 v26, v26, 0, s[28:29] +; GFX7-NEXT: v_cndmask_b32_e64 v26, 0, v26, s[28:29] ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cmp_u_f64_e64 s[40:41], v[28:29], v[31:32] +; GFX7-NEXT: v_cmp_o_f64_e64 s[40:41], v[28:29], v[31:32] ; GFX7-NEXT: v_max_f64 v[28:29], v[28:29], v[31:32] ; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; GFX7-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:128 ; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:124 -; GFX7-NEXT: v_cndmask_b32_e64 v28, v28, 0, s[40:41] +; GFX7-NEXT: v_cndmask_b32_e64 v28, 0, v28, s[40:41] ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cmp_u_f64_e64 s[42:43], v[30:31], v[32:33] +; GFX7-NEXT: v_cmp_o_f64_e64 s[42:43], v[30:31], v[32:33] ; GFX7-NEXT: v_max_f64 v[30:31], v[30:31], v[32:33] ; GFX7-NEXT: v_mov_b32_e32 v32, 0x7ff80000 -; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v32, vcc -; GFX7-NEXT: v_cndmask_b32_e64 v3, v3, v32, s[4:5] -; GFX7-NEXT: v_cndmask_b32_e64 v5, v5, v32, s[6:7] -; GFX7-NEXT: v_cndmask_b32_e64 v7, v7, v32, s[8:9] -; GFX7-NEXT: v_cndmask_b32_e64 v9, v9, v32, s[10:11] -; GFX7-NEXT: v_cndmask_b32_e64 v11, v11, v32, s[12:13] -; GFX7-NEXT: v_cndmask_b32_e64 v13, v13, v32, s[14:15] -; GFX7-NEXT: v_cndmask_b32_e64 v15, v15, v32, s[16:17] -; GFX7-NEXT: v_cndmask_b32_e64 v17, v17, v32, s[18:19] -; GFX7-NEXT: v_cndmask_b32_e64 v19, v19, v32, s[20:21] -; GFX7-NEXT: v_cndmask_b32_e64 v21, v21, v32, s[22:23] -; GFX7-NEXT: v_cndmask_b32_e64 v23, v23, v32, s[24:25] -; GFX7-NEXT: v_cndmask_b32_e64 v25, v25, v32, s[26:27] -; GFX7-NEXT: v_cndmask_b32_e64 v27, v27, v32, s[28:29] -; GFX7-NEXT: v_cndmask_b32_e64 v29, v29, v32, s[40:41] -; GFX7-NEXT: v_cndmask_b32_e64 v31, v31, v32, s[42:43] -; GFX7-NEXT: v_cndmask_b32_e64 v30, v30, 0, s[42:43] +; GFX7-NEXT: v_cndmask_b32_e32 v1, v32, v1, vcc +; GFX7-NEXT: v_cndmask_b32_e64 v3, v32, v3, s[4:5] +; GFX7-NEXT: v_cndmask_b32_e64 v5, v32, v5, s[6:7] +; GFX7-NEXT: v_cndmask_b32_e64 v7, v32, v7, s[8:9] +; GFX7-NEXT: v_cndmask_b32_e64 v9, v32, v9, s[10:11] +; GFX7-NEXT: v_cndmask_b32_e64 v11, v32, v11, s[12:13] +; GFX7-NEXT: v_cndmask_b32_e64 v13, v32, v13, s[14:15] +; GFX7-NEXT: v_cndmask_b32_e64 v15, v32, v15, s[16:17] +; GFX7-NEXT: v_cndmask_b32_e64 v17, v32, v17, s[18:19] +; GFX7-NEXT: v_cndmask_b32_e64 v19, v32, v19, s[20:21] +; GFX7-NEXT: v_cndmask_b32_e64 v21, v32, v21, s[22:23] +; GFX7-NEXT: v_cndmask_b32_e64 v23, v32, v23, s[24:25] +; GFX7-NEXT: v_cndmask_b32_e64 v25, v32, v25, s[26:27] +; GFX7-NEXT: v_cndmask_b32_e64 v27, v32, v27, s[28:29] +; GFX7-NEXT: v_cndmask_b32_e64 v29, v32, v29, s[40:41] +; GFX7-NEXT: v_cndmask_b32_e64 v31, v32, v31, s[42:43] +; GFX7-NEXT: v_cndmask_b32_e64 v30, 0, v30, s[42:43] ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_maximum_v16f64: @@ -2130,117 +2277,117 @@ define <16 x double> @v_maximum_v16f64(<16 x double> %src0, <16 x double> %src1) ; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:8 ; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4 ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[31:32] +; GFX8-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[31:32] ; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[31:32] ; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:16 ; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:12 -; GFX8-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[31:32] +; GFX8-NEXT: v_cmp_o_f64_e64 s[4:5], v[2:3], v[31:32] ; GFX8-NEXT: v_max_f64 v[2:3], v[2:3], v[31:32] ; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:24 ; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:20 -; GFX8-NEXT: v_cndmask_b32_e64 v2, v2, 0, s[4:5] +; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, v2, s[4:5] ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[31:32] +; GFX8-NEXT: v_cmp_o_f64_e64 s[6:7], v[4:5], v[31:32] ; GFX8-NEXT: v_max_f64 v[4:5], v[4:5], v[31:32] ; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:32 ; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:28 -; GFX8-NEXT: v_cndmask_b32_e64 v4, v4, 0, s[6:7] +; GFX8-NEXT: v_cndmask_b32_e64 v4, 0, v4, s[6:7] ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[31:32] +; GFX8-NEXT: v_cmp_o_f64_e64 s[8:9], v[6:7], v[31:32] ; GFX8-NEXT: v_max_f64 v[6:7], v[6:7], v[31:32] ; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:36 ; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:40 -; GFX8-NEXT: v_cndmask_b32_e64 v6, v6, 0, s[8:9] +; GFX8-NEXT: v_cndmask_b32_e64 v6, 0, v6, s[8:9] ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_cmp_u_f64_e64 s[10:11], v[8:9], v[31:32] +; GFX8-NEXT: v_cmp_o_f64_e64 s[10:11], v[8:9], v[31:32] ; GFX8-NEXT: v_max_f64 v[8:9], v[8:9], v[31:32] ; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:48 ; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:44 -; GFX8-NEXT: v_cndmask_b32_e64 v8, v8, 0, s[10:11] +; GFX8-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[10:11] ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_cmp_u_f64_e64 s[12:13], v[10:11], v[31:32] +; GFX8-NEXT: v_cmp_o_f64_e64 s[12:13], v[10:11], v[31:32] ; GFX8-NEXT: v_max_f64 v[10:11], v[10:11], v[31:32] ; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:56 ; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:52 -; GFX8-NEXT: v_cndmask_b32_e64 v10, v10, 0, s[12:13] +; GFX8-NEXT: v_cndmask_b32_e64 v10, 0, v10, s[12:13] ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_cmp_u_f64_e64 s[14:15], v[12:13], v[31:32] +; GFX8-NEXT: v_cmp_o_f64_e64 s[14:15], v[12:13], v[31:32] ; GFX8-NEXT: v_max_f64 v[12:13], v[12:13], v[31:32] ; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:64 ; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:60 -; GFX8-NEXT: v_cndmask_b32_e64 v12, v12, 0, s[14:15] +; GFX8-NEXT: v_cndmask_b32_e64 v12, 0, v12, s[14:15] ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_cmp_u_f64_e64 s[16:17], v[14:15], v[31:32] +; GFX8-NEXT: v_cmp_o_f64_e64 s[16:17], v[14:15], v[31:32] ; GFX8-NEXT: v_max_f64 v[14:15], v[14:15], v[31:32] ; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:68 ; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:72 -; GFX8-NEXT: v_cndmask_b32_e64 v14, v14, 0, s[16:17] +; GFX8-NEXT: v_cndmask_b32_e64 v14, 0, v14, s[16:17] ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_cmp_u_f64_e64 s[18:19], v[16:17], v[31:32] +; GFX8-NEXT: v_cmp_o_f64_e64 s[18:19], v[16:17], v[31:32] ; GFX8-NEXT: v_max_f64 v[16:17], v[16:17], v[31:32] ; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:80 ; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:76 -; GFX8-NEXT: v_cndmask_b32_e64 v16, v16, 0, s[18:19] +; GFX8-NEXT: v_cndmask_b32_e64 v16, 0, v16, s[18:19] ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_cmp_u_f64_e64 s[20:21], v[18:19], v[31:32] +; GFX8-NEXT: v_cmp_o_f64_e64 s[20:21], v[18:19], v[31:32] ; GFX8-NEXT: v_max_f64 v[18:19], v[18:19], v[31:32] ; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:88 ; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:84 -; GFX8-NEXT: v_cndmask_b32_e64 v18, v18, 0, s[20:21] +; GFX8-NEXT: v_cndmask_b32_e64 v18, 0, v18, s[20:21] ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_cmp_u_f64_e64 s[22:23], v[20:21], v[31:32] +; GFX8-NEXT: v_cmp_o_f64_e64 s[22:23], v[20:21], v[31:32] ; GFX8-NEXT: v_max_f64 v[20:21], v[20:21], v[31:32] ; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:96 ; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:92 -; GFX8-NEXT: v_cndmask_b32_e64 v20, v20, 0, s[22:23] +; GFX8-NEXT: v_cndmask_b32_e64 v20, 0, v20, s[22:23] ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_cmp_u_f64_e64 s[24:25], v[22:23], v[31:32] +; GFX8-NEXT: v_cmp_o_f64_e64 s[24:25], v[22:23], v[31:32] ; GFX8-NEXT: v_max_f64 v[22:23], v[22:23], v[31:32] ; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:100 ; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:104 -; GFX8-NEXT: v_cndmask_b32_e64 v22, v22, 0, s[24:25] +; GFX8-NEXT: v_cndmask_b32_e64 v22, 0, v22, s[24:25] ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_cmp_u_f64_e64 s[26:27], v[24:25], v[31:32] +; GFX8-NEXT: v_cmp_o_f64_e64 s[26:27], v[24:25], v[31:32] ; GFX8-NEXT: v_max_f64 v[24:25], v[24:25], v[31:32] ; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:112 ; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:108 -; GFX8-NEXT: v_cndmask_b32_e64 v24, v24, 0, s[26:27] +; GFX8-NEXT: v_cndmask_b32_e64 v24, 0, v24, s[26:27] ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_cmp_u_f64_e64 s[28:29], v[26:27], v[31:32] +; GFX8-NEXT: v_cmp_o_f64_e64 s[28:29], v[26:27], v[31:32] ; GFX8-NEXT: v_max_f64 v[26:27], v[26:27], v[31:32] ; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:120 ; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:116 -; GFX8-NEXT: v_cndmask_b32_e64 v26, v26, 0, s[28:29] +; GFX8-NEXT: v_cndmask_b32_e64 v26, 0, v26, s[28:29] ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_cmp_u_f64_e64 s[40:41], v[28:29], v[31:32] +; GFX8-NEXT: v_cmp_o_f64_e64 s[40:41], v[28:29], v[31:32] ; GFX8-NEXT: v_max_f64 v[28:29], v[28:29], v[31:32] ; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; GFX8-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:128 ; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:124 -; GFX8-NEXT: v_cndmask_b32_e64 v28, v28, 0, s[40:41] +; GFX8-NEXT: v_cndmask_b32_e64 v28, 0, v28, s[40:41] ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_cmp_u_f64_e64 s[42:43], v[30:31], v[32:33] +; GFX8-NEXT: v_cmp_o_f64_e64 s[42:43], v[30:31], v[32:33] ; GFX8-NEXT: v_max_f64 v[30:31], v[30:31], v[32:33] ; GFX8-NEXT: v_mov_b32_e32 v32, 0x7ff80000 -; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v32, vcc -; GFX8-NEXT: v_cndmask_b32_e64 v3, v3, v32, s[4:5] -; GFX8-NEXT: v_cndmask_b32_e64 v5, v5, v32, s[6:7] -; GFX8-NEXT: v_cndmask_b32_e64 v7, v7, v32, s[8:9] -; GFX8-NEXT: v_cndmask_b32_e64 v9, v9, v32, s[10:11] -; GFX8-NEXT: v_cndmask_b32_e64 v11, v11, v32, s[12:13] -; GFX8-NEXT: v_cndmask_b32_e64 v13, v13, v32, s[14:15] -; GFX8-NEXT: v_cndmask_b32_e64 v15, v15, v32, s[16:17] -; GFX8-NEXT: v_cndmask_b32_e64 v17, v17, v32, s[18:19] -; GFX8-NEXT: v_cndmask_b32_e64 v19, v19, v32, s[20:21] -; GFX8-NEXT: v_cndmask_b32_e64 v21, v21, v32, s[22:23] -; GFX8-NEXT: v_cndmask_b32_e64 v23, v23, v32, s[24:25] -; GFX8-NEXT: v_cndmask_b32_e64 v25, v25, v32, s[26:27] -; GFX8-NEXT: v_cndmask_b32_e64 v27, v27, v32, s[28:29] -; GFX8-NEXT: v_cndmask_b32_e64 v29, v29, v32, s[40:41] -; GFX8-NEXT: v_cndmask_b32_e64 v31, v31, v32, s[42:43] -; GFX8-NEXT: v_cndmask_b32_e64 v30, v30, 0, s[42:43] +; GFX8-NEXT: v_cndmask_b32_e32 v1, v32, v1, vcc +; GFX8-NEXT: v_cndmask_b32_e64 v3, v32, v3, s[4:5] +; GFX8-NEXT: v_cndmask_b32_e64 v5, v32, v5, s[6:7] +; GFX8-NEXT: v_cndmask_b32_e64 v7, v32, v7, s[8:9] +; GFX8-NEXT: v_cndmask_b32_e64 v9, v32, v9, s[10:11] +; GFX8-NEXT: v_cndmask_b32_e64 v11, v32, v11, s[12:13] +; GFX8-NEXT: v_cndmask_b32_e64 v13, v32, v13, s[14:15] +; GFX8-NEXT: v_cndmask_b32_e64 v15, v32, v15, s[16:17] +; GFX8-NEXT: v_cndmask_b32_e64 v17, v32, v17, s[18:19] +; GFX8-NEXT: v_cndmask_b32_e64 v19, v32, v19, s[20:21] +; GFX8-NEXT: v_cndmask_b32_e64 v21, v32, v21, s[22:23] +; GFX8-NEXT: v_cndmask_b32_e64 v23, v32, v23, s[24:25] +; GFX8-NEXT: v_cndmask_b32_e64 v25, v32, v25, s[26:27] +; GFX8-NEXT: v_cndmask_b32_e64 v27, v32, v27, s[28:29] +; GFX8-NEXT: v_cndmask_b32_e64 v29, v32, v29, s[40:41] +; GFX8-NEXT: v_cndmask_b32_e64 v31, v32, v31, s[42:43] +; GFX8-NEXT: v_cndmask_b32_e64 v30, 0, v30, s[42:43] ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-LABEL: v_maximum_v16f64: @@ -2249,117 +2396,117 @@ define <16 x double> @v_maximum_v16f64(<16 x double> %src0, <16 x double> %src1) ; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:8 ; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4 ; GFX900-NEXT: s_waitcnt vmcnt(0) -; GFX900-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[31:32] +; GFX900-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[31:32] ; GFX900-NEXT: v_max_f64 v[0:1], v[0:1], v[31:32] ; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:16 ; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:12 -; GFX900-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; GFX900-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc ; GFX900-NEXT: s_waitcnt vmcnt(0) -; GFX900-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[31:32] +; GFX900-NEXT: v_cmp_o_f64_e64 s[4:5], v[2:3], v[31:32] ; GFX900-NEXT: v_max_f64 v[2:3], v[2:3], v[31:32] ; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:24 ; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:20 -; GFX900-NEXT: v_cndmask_b32_e64 v2, v2, 0, s[4:5] +; GFX900-NEXT: v_cndmask_b32_e64 v2, 0, v2, s[4:5] ; GFX900-NEXT: s_waitcnt vmcnt(0) -; GFX900-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[31:32] +; GFX900-NEXT: v_cmp_o_f64_e64 s[6:7], v[4:5], v[31:32] ; GFX900-NEXT: v_max_f64 v[4:5], v[4:5], v[31:32] ; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:32 ; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:28 -; GFX900-NEXT: v_cndmask_b32_e64 v4, v4, 0, s[6:7] +; GFX900-NEXT: v_cndmask_b32_e64 v4, 0, v4, s[6:7] ; GFX900-NEXT: s_waitcnt vmcnt(0) -; GFX900-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[31:32] +; GFX900-NEXT: v_cmp_o_f64_e64 s[8:9], v[6:7], v[31:32] ; GFX900-NEXT: v_max_f64 v[6:7], v[6:7], v[31:32] ; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:36 ; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:40 -; GFX900-NEXT: v_cndmask_b32_e64 v6, v6, 0, s[8:9] +; GFX900-NEXT: v_cndmask_b32_e64 v6, 0, v6, s[8:9] ; GFX900-NEXT: s_waitcnt vmcnt(0) -; GFX900-NEXT: v_cmp_u_f64_e64 s[10:11], v[8:9], v[31:32] +; GFX900-NEXT: v_cmp_o_f64_e64 s[10:11], v[8:9], v[31:32] ; GFX900-NEXT: v_max_f64 v[8:9], v[8:9], v[31:32] ; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:48 ; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:44 -; GFX900-NEXT: v_cndmask_b32_e64 v8, v8, 0, s[10:11] +; GFX900-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[10:11] ; GFX900-NEXT: s_waitcnt vmcnt(0) -; GFX900-NEXT: v_cmp_u_f64_e64 s[12:13], v[10:11], v[31:32] +; GFX900-NEXT: v_cmp_o_f64_e64 s[12:13], v[10:11], v[31:32] ; GFX900-NEXT: v_max_f64 v[10:11], v[10:11], v[31:32] ; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:56 ; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:52 -; GFX900-NEXT: v_cndmask_b32_e64 v10, v10, 0, s[12:13] +; GFX900-NEXT: v_cndmask_b32_e64 v10, 0, v10, s[12:13] ; GFX900-NEXT: s_waitcnt vmcnt(0) -; GFX900-NEXT: v_cmp_u_f64_e64 s[14:15], v[12:13], v[31:32] +; GFX900-NEXT: v_cmp_o_f64_e64 s[14:15], v[12:13], v[31:32] ; GFX900-NEXT: v_max_f64 v[12:13], v[12:13], v[31:32] ; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:64 ; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:60 -; GFX900-NEXT: v_cndmask_b32_e64 v12, v12, 0, s[14:15] +; GFX900-NEXT: v_cndmask_b32_e64 v12, 0, v12, s[14:15] ; GFX900-NEXT: s_waitcnt vmcnt(0) -; GFX900-NEXT: v_cmp_u_f64_e64 s[16:17], v[14:15], v[31:32] +; GFX900-NEXT: v_cmp_o_f64_e64 s[16:17], v[14:15], v[31:32] ; GFX900-NEXT: v_max_f64 v[14:15], v[14:15], v[31:32] ; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:68 ; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:72 -; GFX900-NEXT: v_cndmask_b32_e64 v14, v14, 0, s[16:17] +; GFX900-NEXT: v_cndmask_b32_e64 v14, 0, v14, s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) -; GFX900-NEXT: v_cmp_u_f64_e64 s[18:19], v[16:17], v[31:32] +; GFX900-NEXT: v_cmp_o_f64_e64 s[18:19], v[16:17], v[31:32] ; GFX900-NEXT: v_max_f64 v[16:17], v[16:17], v[31:32] ; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:80 ; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:76 -; GFX900-NEXT: v_cndmask_b32_e64 v16, v16, 0, s[18:19] +; GFX900-NEXT: v_cndmask_b32_e64 v16, 0, v16, s[18:19] ; GFX900-NEXT: s_waitcnt vmcnt(0) -; GFX900-NEXT: v_cmp_u_f64_e64 s[20:21], v[18:19], v[31:32] +; GFX900-NEXT: v_cmp_o_f64_e64 s[20:21], v[18:19], v[31:32] ; GFX900-NEXT: v_max_f64 v[18:19], v[18:19], v[31:32] ; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:88 ; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:84 -; GFX900-NEXT: v_cndmask_b32_e64 v18, v18, 0, s[20:21] +; GFX900-NEXT: v_cndmask_b32_e64 v18, 0, v18, s[20:21] ; GFX900-NEXT: s_waitcnt vmcnt(0) -; GFX900-NEXT: v_cmp_u_f64_e64 s[22:23], v[20:21], v[31:32] +; GFX900-NEXT: v_cmp_o_f64_e64 s[22:23], v[20:21], v[31:32] ; GFX900-NEXT: v_max_f64 v[20:21], v[20:21], v[31:32] ; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:96 ; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:92 -; GFX900-NEXT: v_cndmask_b32_e64 v20, v20, 0, s[22:23] +; GFX900-NEXT: v_cndmask_b32_e64 v20, 0, v20, s[22:23] ; GFX900-NEXT: s_waitcnt vmcnt(0) -; GFX900-NEXT: v_cmp_u_f64_e64 s[24:25], v[22:23], v[31:32] +; GFX900-NEXT: v_cmp_o_f64_e64 s[24:25], v[22:23], v[31:32] ; GFX900-NEXT: v_max_f64 v[22:23], v[22:23], v[31:32] ; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:100 ; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:104 -; GFX900-NEXT: v_cndmask_b32_e64 v22, v22, 0, s[24:25] +; GFX900-NEXT: v_cndmask_b32_e64 v22, 0, v22, s[24:25] ; GFX900-NEXT: s_waitcnt vmcnt(0) -; GFX900-NEXT: v_cmp_u_f64_e64 s[26:27], v[24:25], v[31:32] +; GFX900-NEXT: v_cmp_o_f64_e64 s[26:27], v[24:25], v[31:32] ; GFX900-NEXT: v_max_f64 v[24:25], v[24:25], v[31:32] ; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:112 ; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:108 -; GFX900-NEXT: v_cndmask_b32_e64 v24, v24, 0, s[26:27] +; GFX900-NEXT: v_cndmask_b32_e64 v24, 0, v24, s[26:27] ; GFX900-NEXT: s_waitcnt vmcnt(0) -; GFX900-NEXT: v_cmp_u_f64_e64 s[28:29], v[26:27], v[31:32] +; GFX900-NEXT: v_cmp_o_f64_e64 s[28:29], v[26:27], v[31:32] ; GFX900-NEXT: v_max_f64 v[26:27], v[26:27], v[31:32] ; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:120 ; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:116 -; GFX900-NEXT: v_cndmask_b32_e64 v26, v26, 0, s[28:29] +; GFX900-NEXT: v_cndmask_b32_e64 v26, 0, v26, s[28:29] ; GFX900-NEXT: s_waitcnt vmcnt(0) -; GFX900-NEXT: v_cmp_u_f64_e64 s[40:41], v[28:29], v[31:32] +; GFX900-NEXT: v_cmp_o_f64_e64 s[40:41], v[28:29], v[31:32] ; GFX900-NEXT: v_max_f64 v[28:29], v[28:29], v[31:32] ; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; GFX900-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:128 ; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:124 -; GFX900-NEXT: v_cndmask_b32_e64 v28, v28, 0, s[40:41] +; GFX900-NEXT: v_cndmask_b32_e64 v28, 0, v28, s[40:41] ; GFX900-NEXT: s_waitcnt vmcnt(0) -; GFX900-NEXT: v_cmp_u_f64_e64 s[42:43], v[30:31], v[32:33] +; GFX900-NEXT: v_cmp_o_f64_e64 s[42:43], v[30:31], v[32:33] ; GFX900-NEXT: v_max_f64 v[30:31], v[30:31], v[32:33] ; GFX900-NEXT: v_mov_b32_e32 v32, 0x7ff80000 -; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v32, vcc -; GFX900-NEXT: v_cndmask_b32_e64 v3, v3, v32, s[4:5] -; GFX900-NEXT: v_cndmask_b32_e64 v5, v5, v32, s[6:7] -; GFX900-NEXT: v_cndmask_b32_e64 v7, v7, v32, s[8:9] -; GFX900-NEXT: v_cndmask_b32_e64 v9, v9, v32, s[10:11] -; GFX900-NEXT: v_cndmask_b32_e64 v11, v11, v32, s[12:13] -; GFX900-NEXT: v_cndmask_b32_e64 v13, v13, v32, s[14:15] -; GFX900-NEXT: v_cndmask_b32_e64 v15, v15, v32, s[16:17] -; GFX900-NEXT: v_cndmask_b32_e64 v17, v17, v32, s[18:19] -; GFX900-NEXT: v_cndmask_b32_e64 v19, v19, v32, s[20:21] -; GFX900-NEXT: v_cndmask_b32_e64 v21, v21, v32, s[22:23] -; GFX900-NEXT: v_cndmask_b32_e64 v23, v23, v32, s[24:25] -; GFX900-NEXT: v_cndmask_b32_e64 v25, v25, v32, s[26:27] -; GFX900-NEXT: v_cndmask_b32_e64 v27, v27, v32, s[28:29] -; GFX900-NEXT: v_cndmask_b32_e64 v29, v29, v32, s[40:41] -; GFX900-NEXT: v_cndmask_b32_e64 v31, v31, v32, s[42:43] -; GFX900-NEXT: v_cndmask_b32_e64 v30, v30, 0, s[42:43] +; GFX900-NEXT: v_cndmask_b32_e32 v1, v32, v1, vcc +; GFX900-NEXT: v_cndmask_b32_e64 v3, v32, v3, s[4:5] +; GFX900-NEXT: v_cndmask_b32_e64 v5, v32, v5, s[6:7] +; GFX900-NEXT: v_cndmask_b32_e64 v7, v32, v7, s[8:9] +; GFX900-NEXT: v_cndmask_b32_e64 v9, v32, v9, s[10:11] +; GFX900-NEXT: v_cndmask_b32_e64 v11, v32, v11, s[12:13] +; GFX900-NEXT: v_cndmask_b32_e64 v13, v32, v13, s[14:15] +; GFX900-NEXT: v_cndmask_b32_e64 v15, v32, v15, s[16:17] +; GFX900-NEXT: v_cndmask_b32_e64 v17, v32, v17, s[18:19] +; GFX900-NEXT: v_cndmask_b32_e64 v19, v32, v19, s[20:21] +; GFX900-NEXT: v_cndmask_b32_e64 v21, v32, v21, s[22:23] +; GFX900-NEXT: v_cndmask_b32_e64 v23, v32, v23, s[24:25] +; GFX900-NEXT: v_cndmask_b32_e64 v25, v32, v25, s[26:27] +; GFX900-NEXT: v_cndmask_b32_e64 v27, v32, v27, s[28:29] +; GFX900-NEXT: v_cndmask_b32_e64 v29, v32, v29, s[40:41] +; GFX900-NEXT: v_cndmask_b32_e64 v31, v32, v31, s[42:43] +; GFX900-NEXT: v_cndmask_b32_e64 v30, 0, v30, s[42:43] ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_maximum_v16f64: @@ -2409,6 +2556,7 @@ define <16 x double> @v_maximum_v16f64(<16 x double> %src0, <16 x double> %src1) ; GFX950-NEXT: v_accvgpr_write_b32 a14, v62 ; Reload Reuse ; GFX950-NEXT: v_accvgpr_write_b32 a15, v63 ; Reload Reuse ; GFX950-NEXT: s_waitcnt vmcnt(25) +<<<<<<< HEAD ; GFX950-NEXT: v_max_f64 v[58:59], v[0:1], v[32:33] ; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[32:33] ; GFX950-NEXT: scratch_load_dword v33, off, s32 offset:112 @@ -2517,6 +2665,115 @@ define <16 x double> @v_maximum_v16f64(<16 x double> %src0, <16 x double> %src1) ; GFX950-NEXT: v_accvgpr_read_b32 v60, a12 ; Reload Reuse ; GFX950-NEXT: v_accvgpr_read_b32 v59, a11 ; Reload Reuse ; GFX950-NEXT: v_accvgpr_read_b32 v58, a10 ; Reload Reuse +======= +; GFX950-NEXT: v_max_f64 v[58:59], v[2:3], v[36:37] +; GFX950-NEXT: v_cmp_o_f64_e32 vcc, v[2:3], v[36:37] +; GFX950-NEXT: scratch_load_dword v37, off, s32 offset:112 +; GFX950-NEXT: scratch_load_dword v36, off, s32 offset:108 +; GFX950-NEXT: s_waitcnt vmcnt(25) +; GFX950-NEXT: v_max_f64 v[60:61], v[4:5], v[38:39] +; GFX950-NEXT: v_cmp_o_f64_e64 s[0:1], v[4:5], v[38:39] +; GFX950-NEXT: scratch_load_dword v39, off, s32 offset:120 +; GFX950-NEXT: scratch_load_dword v38, off, s32 offset:116 +; GFX950-NEXT: s_waitcnt vmcnt(25) +; GFX950-NEXT: v_max_f64 v[62:63], v[6:7], v[48:49] +; GFX950-NEXT: v_cmp_o_f64_e64 s[2:3], v[6:7], v[48:49] +; GFX950-NEXT: scratch_load_dword v49, off, s32 offset:128 +; GFX950-NEXT: scratch_load_dword v48, off, s32 offset:124 +; GFX950-NEXT: s_waitcnt vmcnt(25) +; GFX950-NEXT: v_max_f64 v[2:3], v[0:1], v[56:57] +; GFX950-NEXT: v_cmp_o_f64_e64 s[4:5], v[0:1], v[56:57] +; GFX950-NEXT: v_mov_b32_e32 v0, 0x7ff80000 +; GFX950-NEXT: s_waitcnt vmcnt(23) +; GFX950-NEXT: v_max_f64 v[56:57], v[8:9], v[46:47] +; GFX950-NEXT: v_cndmask_b32_e64 v1, 0, v2, s[4:5] +; GFX950-NEXT: v_accvgpr_write_b32 a0, v1 +; GFX950-NEXT: v_cndmask_b32_e64 v1, v0, v3, s[4:5] +; GFX950-NEXT: v_cndmask_b32_e32 v2, 0, v58, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v3, v0, v59, vcc +; GFX950-NEXT: v_cmp_o_f64_e32 vcc, v[8:9], v[46:47] +; GFX950-NEXT: s_waitcnt vmcnt(21) +; GFX950-NEXT: v_max_f64 v[46:47], v[10:11], v[44:45] +; GFX950-NEXT: v_cndmask_b32_e64 v4, 0, v60, s[0:1] +; GFX950-NEXT: v_cndmask_b32_e32 v8, 0, v56, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v9, v0, v57, vcc +; GFX950-NEXT: v_cmp_o_f64_e32 vcc, v[10:11], v[44:45] +; GFX950-NEXT: s_waitcnt vmcnt(19) +; GFX950-NEXT: v_max_f64 v[44:45], v[12:13], v[42:43] +; GFX950-NEXT: v_cndmask_b32_e64 v5, v0, v61, s[0:1] +; GFX950-NEXT: v_cndmask_b32_e32 v10, 0, v46, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v11, v0, v47, vcc +; GFX950-NEXT: v_cmp_o_f64_e32 vcc, v[12:13], v[42:43] +; GFX950-NEXT: s_waitcnt vmcnt(17) +; GFX950-NEXT: v_max_f64 v[42:43], v[14:15], v[40:41] +; GFX950-NEXT: v_cndmask_b32_e64 v6, 0, v62, s[2:3] +; GFX950-NEXT: v_cndmask_b32_e32 v12, 0, v44, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v13, v0, v45, vcc +; GFX950-NEXT: v_cmp_o_f64_e32 vcc, v[14:15], v[40:41] +; GFX950-NEXT: s_waitcnt vmcnt(15) +; GFX950-NEXT: v_max_f64 v[40:41], v[16:17], v[54:55] +; GFX950-NEXT: v_cndmask_b32_e64 v7, v0, v63, s[2:3] +; GFX950-NEXT: v_cndmask_b32_e32 v14, 0, v42, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v15, v0, v43, vcc +; GFX950-NEXT: v_cmp_o_f64_e32 vcc, v[16:17], v[54:55] +; GFX950-NEXT: s_waitcnt vmcnt(13) +; GFX950-NEXT: v_max_f64 v[54:55], v[18:19], v[52:53] +; GFX950-NEXT: v_accvgpr_read_b32 v63, a16 ; Reload Reuse +; GFX950-NEXT: v_cndmask_b32_e32 v16, 0, v40, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v17, v0, v41, vcc +; GFX950-NEXT: v_cmp_o_f64_e32 vcc, v[18:19], v[52:53] +; GFX950-NEXT: s_waitcnt vmcnt(11) +; GFX950-NEXT: v_max_f64 v[52:53], v[20:21], v[50:51] +; GFX950-NEXT: v_accvgpr_read_b32 v62, a15 ; Reload Reuse +; GFX950-NEXT: v_cndmask_b32_e32 v18, 0, v54, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v19, v0, v55, vcc +; GFX950-NEXT: v_cmp_o_f64_e32 vcc, v[20:21], v[50:51] +; GFX950-NEXT: s_waitcnt vmcnt(9) +; GFX950-NEXT: v_max_f64 v[50:51], v[22:23], v[34:35] +; GFX950-NEXT: v_accvgpr_read_b32 v61, a14 ; Reload Reuse +; GFX950-NEXT: v_cndmask_b32_e32 v20, 0, v52, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v21, v0, v53, vcc +; GFX950-NEXT: v_cmp_o_f64_e32 vcc, v[22:23], v[34:35] +; GFX950-NEXT: s_waitcnt vmcnt(6) +; GFX950-NEXT: v_max_f64 v[34:35], v[24:25], v[32:33] +; GFX950-NEXT: v_accvgpr_read_b32 v60, a13 ; Reload Reuse +; GFX950-NEXT: v_cndmask_b32_e32 v22, 0, v50, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v23, v0, v51, vcc +; GFX950-NEXT: v_cmp_o_f64_e32 vcc, v[24:25], v[32:33] +; GFX950-NEXT: v_accvgpr_read_b32 v59, a12 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_read_b32 v58, a11 ; Reload Reuse +; GFX950-NEXT: v_cndmask_b32_e32 v24, 0, v34, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v25, v0, v35, vcc +; GFX950-NEXT: v_accvgpr_read_b32 v57, a10 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_read_b32 v56, a9 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_read_b32 v47, a8 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_read_b32 v46, a7 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_read_b32 v45, a6 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_read_b32 v44, a5 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_read_b32 v43, a4 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_read_b32 v42, a3 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_read_b32 v41, a2 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_read_b32 v40, a1 ; Reload Reuse +; GFX950-NEXT: s_waitcnt vmcnt(4) +; GFX950-NEXT: v_max_f64 v[32:33], v[26:27], v[36:37] +; GFX950-NEXT: v_cmp_o_f64_e32 vcc, v[26:27], v[36:37] +; GFX950-NEXT: s_nop 1 +; GFX950-NEXT: v_cndmask_b32_e32 v26, 0, v32, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v27, v0, v33, vcc +; GFX950-NEXT: s_waitcnt vmcnt(2) +; GFX950-NEXT: v_max_f64 v[32:33], v[28:29], v[38:39] +; GFX950-NEXT: v_cmp_o_f64_e32 vcc, v[28:29], v[38:39] +; GFX950-NEXT: s_nop 1 +; GFX950-NEXT: v_cndmask_b32_e32 v28, 0, v32, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v29, v0, v33, vcc +; GFX950-NEXT: s_waitcnt vmcnt(0) +; GFX950-NEXT: v_max_f64 v[32:33], v[30:31], v[48:49] +; GFX950-NEXT: v_cmp_o_f64_e32 vcc, v[30:31], v[48:49] +; GFX950-NEXT: s_nop 1 +; GFX950-NEXT: v_cndmask_b32_e32 v30, 0, v32, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v31, v0, v33, vcc +; GFX950-NEXT: v_accvgpr_read_b32 v0, a0 +>>>>>>> 1336afc5defe (update tests) ; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_maximum_v16f64: @@ -2550,17 +2807,26 @@ define <16 x double> @v_maximum_v16f64(<16 x double> %src0, <16 x double> %src1) ; GFX10-NEXT: buffer_load_dword v51, off, s[0:3], s32 offset:72 ; GFX10-NEXT: buffer_load_dword v67, off, s[0:3], s32 offset:104 ; GFX10-NEXT: s_waitcnt vmcnt(24) +<<<<<<< HEAD ; GFX10-NEXT: v_max_f64 v[82:83], v[0:1], v[31:32] ; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[31:32] ; GFX10-NEXT: s_waitcnt vmcnt(22) ; GFX10-NEXT: v_max_f64 v[84:85], v[2:3], v[33:34] ; GFX10-NEXT: v_cmp_u_f64_e64 s4, v[2:3], v[33:34] +======= +; GFX10-NEXT: v_max_f64 v[82:83], v[2:3], v[31:32] +; GFX10-NEXT: v_cmp_o_f64_e32 vcc_lo, v[2:3], v[31:32] +; GFX10-NEXT: s_waitcnt vmcnt(22) +; GFX10-NEXT: v_max_f64 v[84:85], v[4:5], v[33:34] +; GFX10-NEXT: v_cmp_o_f64_e64 s4, v[4:5], v[33:34] +>>>>>>> 1336afc5defe (update tests) ; GFX10-NEXT: s_clause 0x3 ; GFX10-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:120 ; GFX10-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:116 ; GFX10-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:112 ; GFX10-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:108 ; GFX10-NEXT: s_waitcnt vmcnt(24) +<<<<<<< HEAD ; GFX10-NEXT: v_max_f64 v[32:33], v[4:5], v[35:36] ; GFX10-NEXT: v_cmp_u_f64_e64 s5, v[4:5], v[35:36] ; GFX10-NEXT: s_clause 0x2 @@ -2585,9 +2851,36 @@ define <16 x double> @v_maximum_v16f64(<16 x double> %src0, <16 x double> %src1) ; GFX10-NEXT: s_waitcnt vmcnt(11) ; GFX10-NEXT: v_max_f64 v[64:65], v[20:21], v[70:71] ; GFX10-NEXT: v_cmp_u_f64_e64 s13, v[20:21], v[70:71] +======= +; GFX10-NEXT: v_max_f64 v[32:33], v[6:7], v[35:36] +; GFX10-NEXT: v_cmp_o_f64_e64 s5, v[6:7], v[35:36] +; GFX10-NEXT: s_clause 0x2 +; GFX10-NEXT: buffer_load_dword v31, off, s[0:3], s32 +; GFX10-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:128 +; GFX10-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:124 +; GFX10-NEXT: s_waitcnt vmcnt(23) +; GFX10-NEXT: v_cmp_o_f64_e64 s10, v[14:15], v[50:51] +; GFX10-NEXT: s_waitcnt vmcnt(21) +; GFX10-NEXT: v_cmp_o_f64_e64 s9, v[12:13], v[52:53] +; GFX10-NEXT: s_waitcnt vmcnt(19) +; GFX10-NEXT: v_cmp_o_f64_e64 s7, v[10:11], v[54:55] +; GFX10-NEXT: s_waitcnt vmcnt(18) +; GFX10-NEXT: v_max_f64 v[34:35], v[8:9], v[37:38] +; GFX10-NEXT: v_cmp_o_f64_e64 s6, v[8:9], v[37:38] +; GFX10-NEXT: s_waitcnt vmcnt(16) +; GFX10-NEXT: v_max_f64 v[8:9], v[0:1], v[64:65] +; GFX10-NEXT: v_max_f64 v[36:37], v[10:11], v[54:55] +; GFX10-NEXT: v_cmp_o_f64_e64 s8, v[0:1], v[64:65] +; GFX10-NEXT: v_max_f64 v[38:39], v[12:13], v[52:53] +; GFX10-NEXT: v_max_f64 v[52:53], v[14:15], v[50:51] +; GFX10-NEXT: s_waitcnt vmcnt(11) +; GFX10-NEXT: v_max_f64 v[54:55], v[20:21], v[70:71] +; GFX10-NEXT: v_cmp_o_f64_e64 s13, v[20:21], v[70:71] +>>>>>>> 1336afc5defe (update tests) ; GFX10-NEXT: s_waitcnt vmcnt(9) -; GFX10-NEXT: v_cmp_u_f64_e64 s12, v[18:19], v[80:81] +; GFX10-NEXT: v_cmp_o_f64_e64 s12, v[18:19], v[80:81] ; GFX10-NEXT: s_waitcnt vmcnt(8) +<<<<<<< HEAD ; GFX10-NEXT: v_max_f64 v[52:53], v[16:17], v[50:51] ; GFX10-NEXT: v_cmp_u_f64_e64 s11, v[16:17], v[50:51] ; GFX10-NEXT: v_max_f64 v[50:51], v[18:19], v[80:81] @@ -2637,6 +2930,57 @@ define <16 x double> @v_maximum_v16f64(<16 x double> %src0, <16 x double> %src1) ; GFX10-NEXT: v_cndmask_b32_e64 v27, v67, 0x7ff80000, s16 ; GFX10-NEXT: v_cndmask_b32_e64 v30, v86, 0, s18 ; GFX10-NEXT: v_cndmask_b32_e64 v31, v87, 0x7ff80000, s18 +======= +; GFX10-NEXT: v_max_f64 v[50:51], v[16:17], v[48:49] +; GFX10-NEXT: v_cmp_o_f64_e64 s11, v[16:17], v[48:49] +; GFX10-NEXT: v_max_f64 v[48:49], v[18:19], v[80:81] +; GFX10-NEXT: v_max_f64 v[64:65], v[22:23], v[68:69] +; GFX10-NEXT: v_cmp_o_f64_e64 s14, v[22:23], v[68:69] +; GFX10-NEXT: s_waitcnt vmcnt(7) +; GFX10-NEXT: v_max_f64 v[68:69], v[24:25], v[66:67] +; GFX10-NEXT: v_cmp_o_f64_e64 s15, v[24:25], v[66:67] +; GFX10-NEXT: v_cndmask_b32_e64 v10, 0, v36, s7 +; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, v8, s8 +; GFX10-NEXT: v_cndmask_b32_e64 v1, 0x7ff80000, v9, s8 +; GFX10-NEXT: v_cndmask_b32_e64 v8, 0, v34, s6 +; GFX10-NEXT: v_cndmask_b32_e64 v9, 0x7ff80000, v35, s6 +; GFX10-NEXT: v_cndmask_b32_e64 v11, 0x7ff80000, v37, s7 +; GFX10-NEXT: v_cndmask_b32_e64 v12, 0, v38, s9 +; GFX10-NEXT: v_cndmask_b32_e64 v13, 0x7ff80000, v39, s9 +; GFX10-NEXT: v_cndmask_b32_e64 v14, 0, v52, s10 +; GFX10-NEXT: v_cndmask_b32_e64 v15, 0x7ff80000, v53, s10 +; GFX10-NEXT: v_cndmask_b32_e64 v16, 0, v50, s11 +; GFX10-NEXT: v_cndmask_b32_e64 v17, 0x7ff80000, v51, s11 +; GFX10-NEXT: v_cndmask_b32_e64 v18, 0, v48, s12 +; GFX10-NEXT: v_cndmask_b32_e64 v19, 0x7ff80000, v49, s12 +; GFX10-NEXT: v_cndmask_b32_e64 v20, 0, v54, s13 +; GFX10-NEXT: v_cndmask_b32_e64 v21, 0x7ff80000, v55, s13 +; GFX10-NEXT: v_cndmask_b32_e64 v22, 0, v64, s14 +; GFX10-NEXT: v_cndmask_b32_e64 v23, 0x7ff80000, v65, s14 +; GFX10-NEXT: v_cndmask_b32_e64 v24, 0, v68, s15 +; GFX10-NEXT: v_cndmask_b32_e64 v25, 0x7ff80000, v69, s15 +; GFX10-NEXT: s_waitcnt vmcnt(5) +; GFX10-NEXT: v_max_f64 v[70:71], v[28:29], v[2:3] +; GFX10-NEXT: v_cmp_o_f64_e64 s17, v[28:29], v[2:3] +; GFX10-NEXT: s_waitcnt vmcnt(3) +; GFX10-NEXT: v_max_f64 v[66:67], v[26:27], v[4:5] +; GFX10-NEXT: v_cmp_o_f64_e64 s16, v[26:27], v[4:5] +; GFX10-NEXT: v_cndmask_b32_e32 v2, 0, v82, vcc_lo +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: v_max_f64 v[80:81], v[30:31], v[6:7] +; GFX10-NEXT: v_cmp_o_f64_e64 s18, v[30:31], v[6:7] +; GFX10-NEXT: v_cndmask_b32_e32 v3, 0x7ff80000, v83, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e64 v4, 0, v84, s4 +; GFX10-NEXT: v_cndmask_b32_e64 v5, 0x7ff80000, v85, s4 +; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, v32, s5 +; GFX10-NEXT: v_cndmask_b32_e64 v7, 0x7ff80000, v33, s5 +; GFX10-NEXT: v_cndmask_b32_e64 v28, 0, v70, s17 +; GFX10-NEXT: v_cndmask_b32_e64 v29, 0x7ff80000, v71, s17 +; GFX10-NEXT: v_cndmask_b32_e64 v26, 0, v66, s16 +; GFX10-NEXT: v_cndmask_b32_e64 v27, 0x7ff80000, v67, s16 +; GFX10-NEXT: v_cndmask_b32_e64 v30, 0, v80, s18 +; GFX10-NEXT: v_cndmask_b32_e64 v31, 0x7ff80000, v81, s18 +>>>>>>> 1336afc5defe (update tests) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_maximum_v16f64: @@ -2678,84 +3022,84 @@ define <16 x double> @v_maximum_v16f64(<16 x double> %src0, <16 x double> %src1) ; GFX11-NEXT: scratch_load_b32 v86, off, s32 offset:124 ; GFX11-NEXT: s_waitcnt vmcnt(30) ; GFX11-NEXT: v_max_f64 v[96:97], v[0:1], v[32:33] -; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[32:33] +; GFX11-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[32:33] ; GFX11-NEXT: s_waitcnt vmcnt(28) ; GFX11-NEXT: v_max_f64 v[32:33], v[2:3], v[34:35] -; GFX11-NEXT: v_cmp_u_f64_e64 s0, v[2:3], v[34:35] +; GFX11-NEXT: v_cmp_o_f64_e64 s0, v[2:3], v[34:35] ; GFX11-NEXT: s_waitcnt vmcnt(26) ; GFX11-NEXT: v_max_f64 v[34:35], v[4:5], v[36:37] -; GFX11-NEXT: v_cmp_u_f64_e64 s1, v[4:5], v[36:37] +; GFX11-NEXT: v_cmp_o_f64_e64 s1, v[4:5], v[36:37] ; GFX11-NEXT: s_waitcnt vmcnt(24) ; GFX11-NEXT: v_max_f64 v[36:37], v[6:7], v[38:39] -; GFX11-NEXT: v_cmp_u_f64_e64 s2, v[6:7], v[38:39] +; GFX11-NEXT: v_cmp_o_f64_e64 s2, v[6:7], v[38:39] ; GFX11-NEXT: s_waitcnt vmcnt(22) ; GFX11-NEXT: v_max_f64 v[38:39], v[8:9], v[48:49] -; GFX11-NEXT: v_cmp_u_f64_e64 s3, v[8:9], v[48:49] +; GFX11-NEXT: v_cmp_o_f64_e64 s3, v[8:9], v[48:49] ; GFX11-NEXT: s_waitcnt vmcnt(20) ; GFX11-NEXT: v_max_f64 v[48:49], v[10:11], v[50:51] -; GFX11-NEXT: v_cmp_u_f64_e64 s4, v[10:11], v[50:51] +; GFX11-NEXT: v_cmp_o_f64_e64 s4, v[10:11], v[50:51] ; GFX11-NEXT: s_waitcnt vmcnt(18) ; GFX11-NEXT: v_max_f64 v[50:51], v[12:13], v[52:53] -; GFX11-NEXT: v_cmp_u_f64_e64 s5, v[12:13], v[52:53] +; GFX11-NEXT: v_cmp_o_f64_e64 s5, v[12:13], v[52:53] ; GFX11-NEXT: s_waitcnt vmcnt(16) ; GFX11-NEXT: v_max_f64 v[52:53], v[14:15], v[54:55] -; GFX11-NEXT: v_cmp_u_f64_e64 s6, v[14:15], v[54:55] +; GFX11-NEXT: v_cmp_o_f64_e64 s6, v[14:15], v[54:55] ; GFX11-NEXT: s_waitcnt vmcnt(14) ; GFX11-NEXT: v_max_f64 v[54:55], v[16:17], v[64:65] -; GFX11-NEXT: v_cmp_u_f64_e64 s7, v[16:17], v[64:65] +; GFX11-NEXT: v_cmp_o_f64_e64 s7, v[16:17], v[64:65] ; GFX11-NEXT: s_waitcnt vmcnt(12) ; GFX11-NEXT: v_max_f64 v[64:65], v[18:19], v[66:67] -; GFX11-NEXT: v_cmp_u_f64_e64 s8, v[18:19], v[66:67] +; GFX11-NEXT: v_cmp_o_f64_e64 s8, v[18:19], v[66:67] ; GFX11-NEXT: s_waitcnt vmcnt(10) ; GFX11-NEXT: v_max_f64 v[66:67], v[20:21], v[68:69] -; GFX11-NEXT: v_cmp_u_f64_e64 s9, v[20:21], v[68:69] +; GFX11-NEXT: v_cmp_o_f64_e64 s9, v[20:21], v[68:69] ; GFX11-NEXT: s_waitcnt vmcnt(8) ; GFX11-NEXT: v_max_f64 v[68:69], v[22:23], v[70:71] -; GFX11-NEXT: v_cmp_u_f64_e64 s10, v[22:23], v[70:71] +; GFX11-NEXT: v_cmp_o_f64_e64 s10, v[22:23], v[70:71] ; GFX11-NEXT: s_waitcnt vmcnt(6) ; GFX11-NEXT: v_max_f64 v[70:71], v[24:25], v[80:81] -; GFX11-NEXT: v_cmp_u_f64_e64 s11, v[24:25], v[80:81] +; GFX11-NEXT: v_cmp_o_f64_e64 s11, v[24:25], v[80:81] ; GFX11-NEXT: s_waitcnt vmcnt(4) ; GFX11-NEXT: v_max_f64 v[80:81], v[26:27], v[82:83] -; GFX11-NEXT: v_cmp_u_f64_e64 s12, v[26:27], v[82:83] +; GFX11-NEXT: v_cmp_o_f64_e64 s12, v[26:27], v[82:83] ; GFX11-NEXT: s_waitcnt vmcnt(2) ; GFX11-NEXT: v_max_f64 v[82:83], v[28:29], v[84:85] -; GFX11-NEXT: v_cmp_u_f64_e64 s13, v[28:29], v[84:85] +; GFX11-NEXT: v_cmp_o_f64_e64 s13, v[28:29], v[84:85] ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_max_f64 v[84:85], v[30:31], v[86:87] -; GFX11-NEXT: v_cmp_u_f64_e64 s14, v[30:31], v[86:87] -; GFX11-NEXT: v_cndmask_b32_e64 v0, v96, 0, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v1, v97, 0x7ff80000, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v2, v32, 0, s0 -; GFX11-NEXT: v_cndmask_b32_e64 v3, v33, 0x7ff80000, s0 -; GFX11-NEXT: v_cndmask_b32_e64 v4, v34, 0, s1 -; GFX11-NEXT: v_cndmask_b32_e64 v5, v35, 0x7ff80000, s1 -; GFX11-NEXT: v_cndmask_b32_e64 v6, v36, 0, s2 -; GFX11-NEXT: v_cndmask_b32_e64 v7, v37, 0x7ff80000, s2 -; GFX11-NEXT: v_cndmask_b32_e64 v8, v38, 0, s3 -; GFX11-NEXT: v_cndmask_b32_e64 v9, v39, 0x7ff80000, s3 -; GFX11-NEXT: v_cndmask_b32_e64 v10, v48, 0, s4 -; GFX11-NEXT: v_cndmask_b32_e64 v11, v49, 0x7ff80000, s4 -; GFX11-NEXT: v_cndmask_b32_e64 v12, v50, 0, s5 -; GFX11-NEXT: v_cndmask_b32_e64 v13, v51, 0x7ff80000, s5 -; GFX11-NEXT: v_cndmask_b32_e64 v14, v52, 0, s6 -; GFX11-NEXT: v_cndmask_b32_e64 v15, v53, 0x7ff80000, s6 -; GFX11-NEXT: v_cndmask_b32_e64 v16, v54, 0, s7 -; GFX11-NEXT: v_cndmask_b32_e64 v17, v55, 0x7ff80000, s7 -; GFX11-NEXT: v_cndmask_b32_e64 v18, v64, 0, s8 -; GFX11-NEXT: v_cndmask_b32_e64 v19, v65, 0x7ff80000, s8 -; GFX11-NEXT: v_cndmask_b32_e64 v20, v66, 0, s9 -; GFX11-NEXT: v_cndmask_b32_e64 v21, v67, 0x7ff80000, s9 -; GFX11-NEXT: v_cndmask_b32_e64 v22, v68, 0, s10 -; GFX11-NEXT: v_cndmask_b32_e64 v23, v69, 0x7ff80000, s10 -; GFX11-NEXT: v_cndmask_b32_e64 v24, v70, 0, s11 -; GFX11-NEXT: v_cndmask_b32_e64 v25, v71, 0x7ff80000, s11 -; GFX11-NEXT: v_cndmask_b32_e64 v26, v80, 0, s12 -; GFX11-NEXT: v_cndmask_b32_e64 v27, v81, 0x7ff80000, s12 -; GFX11-NEXT: v_cndmask_b32_e64 v28, v82, 0, s13 -; GFX11-NEXT: v_cndmask_b32_e64 v29, v83, 0x7ff80000, s13 -; GFX11-NEXT: v_cndmask_b32_e64 v30, v84, 0, s14 -; GFX11-NEXT: v_cndmask_b32_e64 v31, v85, 0x7ff80000, s14 +; GFX11-NEXT: v_cmp_o_f64_e64 s14, v[30:31], v[86:87] +; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v96, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v97, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, v32, s0 +; GFX11-NEXT: v_cndmask_b32_e64 v3, 0x7ff80000, v33, s0 +; GFX11-NEXT: v_cndmask_b32_e64 v4, 0, v34, s1 +; GFX11-NEXT: v_cndmask_b32_e64 v5, 0x7ff80000, v35, s1 +; GFX11-NEXT: v_cndmask_b32_e64 v6, 0, v36, s2 +; GFX11-NEXT: v_cndmask_b32_e64 v7, 0x7ff80000, v37, s2 +; GFX11-NEXT: v_cndmask_b32_e64 v8, 0, v38, s3 +; GFX11-NEXT: v_cndmask_b32_e64 v9, 0x7ff80000, v39, s3 +; GFX11-NEXT: v_cndmask_b32_e64 v10, 0, v48, s4 +; GFX11-NEXT: v_cndmask_b32_e64 v11, 0x7ff80000, v49, s4 +; GFX11-NEXT: v_cndmask_b32_e64 v12, 0, v50, s5 +; GFX11-NEXT: v_cndmask_b32_e64 v13, 0x7ff80000, v51, s5 +; GFX11-NEXT: v_cndmask_b32_e64 v14, 0, v52, s6 +; GFX11-NEXT: v_cndmask_b32_e64 v15, 0x7ff80000, v53, s6 +; GFX11-NEXT: v_cndmask_b32_e64 v16, 0, v54, s7 +; GFX11-NEXT: v_cndmask_b32_e64 v17, 0x7ff80000, v55, s7 +; GFX11-NEXT: v_cndmask_b32_e64 v18, 0, v64, s8 +; GFX11-NEXT: v_cndmask_b32_e64 v19, 0x7ff80000, v65, s8 +; GFX11-NEXT: v_cndmask_b32_e64 v20, 0, v66, s9 +; GFX11-NEXT: v_cndmask_b32_e64 v21, 0x7ff80000, v67, s9 +; GFX11-NEXT: v_cndmask_b32_e64 v22, 0, v68, s10 +; GFX11-NEXT: v_cndmask_b32_e64 v23, 0x7ff80000, v69, s10 +; GFX11-NEXT: v_cndmask_b32_e64 v24, 0, v70, s11 +; GFX11-NEXT: v_cndmask_b32_e64 v25, 0x7ff80000, v71, s11 +; GFX11-NEXT: v_cndmask_b32_e64 v26, 0, v80, s12 +; GFX11-NEXT: v_cndmask_b32_e64 v27, 0x7ff80000, v81, s12 +; GFX11-NEXT: v_cndmask_b32_e64 v28, 0, v82, s13 +; GFX11-NEXT: v_cndmask_b32_e64 v29, 0x7ff80000, v83, s13 +; GFX11-NEXT: v_cndmask_b32_e64 v30, 0, v84, s14 +; GFX11-NEXT: v_cndmask_b32_e64 v31, 0x7ff80000, v85, s14 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: v_maximum_v16f64: diff --git a/llvm/test/CodeGen/AMDGPU/llvm.minimum.f64.ll b/llvm/test/CodeGen/AMDGPU/llvm.minimum.f64.ll index 4c413af878462..30ad0f12da18b 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.minimum.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.minimum.f64.ll @@ -13,60 +13,60 @@ define double @v_minimum_f64(double %src0, double %src1) { ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: v_min_f64 v[4:5], v[0:1], v[2:3] -; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3] +; GFX7-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[2:3] ; GFX7-NEXT: v_mov_b32_e32 v1, 0x7ff80000 -; GFX7-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc -; GFX7-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_minimum_f64: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_min_f64 v[4:5], v[0:1], v[2:3] -; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3] +; GFX8-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[2:3] ; GFX8-NEXT: v_mov_b32_e32 v1, 0x7ff80000 -; GFX8-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc -; GFX8-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-LABEL: v_minimum_f64: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: v_min_f64 v[4:5], v[0:1], v[2:3] -; GFX900-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3] +; GFX900-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[2:3] ; GFX900-NEXT: v_mov_b32_e32 v1, 0x7ff80000 -; GFX900-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc -; GFX900-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc +; GFX900-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc +; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_minimum_f64: ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX950-NEXT: v_min_f64 v[4:5], v[0:1], v[2:3] -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3] +; GFX950-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[2:3] ; GFX950-NEXT: v_mov_b32_e32 v1, 0x7ff80000 ; GFX950-NEXT: s_nop 0 -; GFX950-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc ; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_minimum_f64: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_min_f64 v[4:5], v[0:1], v[2:3] -; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3] -; GFX10-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo +; GFX10-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[2:3] +; GFX10-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v5, vcc_lo ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_minimum_f64: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_min_f64 v[4:5], v[0:1], v[2:3] -; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3] +; GFX11-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[2:3] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v5, vcc_lo ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: v_minimum_f64: @@ -131,60 +131,60 @@ define double @v_minimum_f64__nsz(double %src0, double %src1) { ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: v_min_f64 v[4:5], v[0:1], v[2:3] -; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3] +; GFX7-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[2:3] ; GFX7-NEXT: v_mov_b32_e32 v1, 0x7ff80000 -; GFX7-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc -; GFX7-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_minimum_f64__nsz: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_min_f64 v[4:5], v[0:1], v[2:3] -; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3] +; GFX8-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[2:3] ; GFX8-NEXT: v_mov_b32_e32 v1, 0x7ff80000 -; GFX8-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc -; GFX8-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-LABEL: v_minimum_f64__nsz: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: v_min_f64 v[4:5], v[0:1], v[2:3] -; GFX900-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3] +; GFX900-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[2:3] ; GFX900-NEXT: v_mov_b32_e32 v1, 0x7ff80000 -; GFX900-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc -; GFX900-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc +; GFX900-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc +; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_minimum_f64__nsz: ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX950-NEXT: v_min_f64 v[4:5], v[0:1], v[2:3] -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3] +; GFX950-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[2:3] ; GFX950-NEXT: v_mov_b32_e32 v1, 0x7ff80000 ; GFX950-NEXT: s_nop 0 -; GFX950-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc ; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_minimum_f64__nsz: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_min_f64 v[4:5], v[0:1], v[2:3] -; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3] -; GFX10-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo +; GFX10-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[2:3] +; GFX10-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v5, vcc_lo ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_minimum_f64__nsz: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_min_f64 v[4:5], v[0:1], v[2:3] -; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3] +; GFX11-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[2:3] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v5, vcc_lo ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: v_minimum_f64__nsz: @@ -250,10 +250,10 @@ define double @v_minimum_f64__nnan_src0(double %arg0, double %src1) { ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: v_add_f64 v[0:1], v[0:1], 1.0 ; GFX7-NEXT: v_min_f64 v[4:5], v[0:1], v[2:3] -; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3] +; GFX7-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[2:3] ; GFX7-NEXT: v_mov_b32_e32 v1, 0x7ff80000 -; GFX7-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc -; GFX7-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_minimum_f64__nnan_src0: @@ -261,10 +261,10 @@ define double @v_minimum_f64__nnan_src0(double %arg0, double %src1) { ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_add_f64 v[0:1], v[0:1], 1.0 ; GFX8-NEXT: v_min_f64 v[4:5], v[0:1], v[2:3] -; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3] +; GFX8-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[2:3] ; GFX8-NEXT: v_mov_b32_e32 v1, 0x7ff80000 -; GFX8-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc -; GFX8-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-LABEL: v_minimum_f64__nnan_src0: @@ -272,10 +272,10 @@ define double @v_minimum_f64__nnan_src0(double %arg0, double %src1) { ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: v_add_f64 v[0:1], v[0:1], 1.0 ; GFX900-NEXT: v_min_f64 v[4:5], v[0:1], v[2:3] -; GFX900-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3] +; GFX900-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[2:3] ; GFX900-NEXT: v_mov_b32_e32 v1, 0x7ff80000 -; GFX900-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc -; GFX900-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc +; GFX900-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc +; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_minimum_f64__nnan_src0: @@ -283,11 +283,11 @@ define double @v_minimum_f64__nnan_src0(double %arg0, double %src1) { ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX950-NEXT: v_add_f64 v[0:1], v[0:1], 1.0 ; GFX950-NEXT: v_min_f64 v[4:5], v[0:1], v[2:3] -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3] +; GFX950-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[2:3] ; GFX950-NEXT: v_mov_b32_e32 v1, 0x7ff80000 ; GFX950-NEXT: s_nop 0 -; GFX950-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc ; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_minimum_f64__nnan_src0: @@ -295,9 +295,9 @@ define double @v_minimum_f64__nnan_src0(double %arg0, double %src1) { ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_add_f64 v[0:1], v[0:1], 1.0 ; GFX10-NEXT: v_min_f64 v[4:5], v[0:1], v[2:3] -; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3] -; GFX10-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo +; GFX10-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[2:3] +; GFX10-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v5, vcc_lo ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_minimum_f64__nnan_src0: @@ -306,10 +306,10 @@ define double @v_minimum_f64__nnan_src0(double %arg0, double %src1) { ; GFX11-NEXT: v_add_f64 v[0:1], v[0:1], 1.0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_min_f64 v[4:5], v[0:1], v[2:3] -; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3] -; GFX11-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc_lo +; GFX11-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[2:3] +; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc_lo ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) -; GFX11-NEXT: v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v5, vcc_lo ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: v_minimum_f64__nnan_src0: @@ -334,10 +334,10 @@ define double @v_minimum_f64__nnan_src1(double %src0, double %arg1) { ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: v_add_f64 v[2:3], v[2:3], 1.0 ; GFX7-NEXT: v_min_f64 v[4:5], v[0:1], v[2:3] -; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3] +; GFX7-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[2:3] ; GFX7-NEXT: v_mov_b32_e32 v1, 0x7ff80000 -; GFX7-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc -; GFX7-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_minimum_f64__nnan_src1: @@ -345,10 +345,10 @@ define double @v_minimum_f64__nnan_src1(double %src0, double %arg1) { ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_add_f64 v[2:3], v[2:3], 1.0 ; GFX8-NEXT: v_min_f64 v[4:5], v[0:1], v[2:3] -; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3] +; GFX8-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[2:3] ; GFX8-NEXT: v_mov_b32_e32 v1, 0x7ff80000 -; GFX8-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc -; GFX8-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-LABEL: v_minimum_f64__nnan_src1: @@ -356,10 +356,10 @@ define double @v_minimum_f64__nnan_src1(double %src0, double %arg1) { ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: v_add_f64 v[2:3], v[2:3], 1.0 ; GFX900-NEXT: v_min_f64 v[4:5], v[0:1], v[2:3] -; GFX900-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3] +; GFX900-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[2:3] ; GFX900-NEXT: v_mov_b32_e32 v1, 0x7ff80000 -; GFX900-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc -; GFX900-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc +; GFX900-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc +; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_minimum_f64__nnan_src1: @@ -367,11 +367,11 @@ define double @v_minimum_f64__nnan_src1(double %src0, double %arg1) { ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX950-NEXT: v_add_f64 v[2:3], v[2:3], 1.0 ; GFX950-NEXT: v_min_f64 v[4:5], v[0:1], v[2:3] -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3] +; GFX950-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[2:3] ; GFX950-NEXT: v_mov_b32_e32 v1, 0x7ff80000 ; GFX950-NEXT: s_nop 0 -; GFX950-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc ; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_minimum_f64__nnan_src1: @@ -379,9 +379,9 @@ define double @v_minimum_f64__nnan_src1(double %src0, double %arg1) { ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_add_f64 v[2:3], v[2:3], 1.0 ; GFX10-NEXT: v_min_f64 v[4:5], v[0:1], v[2:3] -; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3] -; GFX10-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo +; GFX10-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[2:3] +; GFX10-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v5, vcc_lo ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_minimum_f64__nnan_src1: @@ -390,10 +390,10 @@ define double @v_minimum_f64__nnan_src1(double %src0, double %arg1) { ; GFX11-NEXT: v_add_f64 v[2:3], v[2:3], 1.0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_min_f64 v[4:5], v[0:1], v[2:3] -; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3] -; GFX11-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc_lo +; GFX11-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[2:3] +; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc_lo ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) -; GFX11-NEXT: v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v5, vcc_lo ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: v_minimum_f64__nnan_src1: @@ -419,10 +419,10 @@ define void @s_minimum_f64(double inreg %src0, double inreg %src1) { ; GFX7-NEXT: v_mov_b32_e32 v0, s18 ; GFX7-NEXT: v_mov_b32_e32 v1, s19 ; GFX7-NEXT: v_min_f64 v[2:3], s[16:17], v[0:1] -; GFX7-NEXT: v_cmp_u_f64_e32 vcc, s[16:17], v[0:1] +; GFX7-NEXT: v_cmp_o_f64_e32 vcc, s[16:17], v[0:1] ; GFX7-NEXT: v_mov_b32_e32 v4, 0x7ff80000 -; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc -; GFX7-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX7-NEXT: ;;#ASMSTART ; GFX7-NEXT: ; use v[0:1] ; GFX7-NEXT: ;;#ASMEND @@ -434,10 +434,10 @@ define void @s_minimum_f64(double inreg %src0, double inreg %src1) { ; GFX8-NEXT: v_mov_b32_e32 v0, s18 ; GFX8-NEXT: v_mov_b32_e32 v1, s19 ; GFX8-NEXT: v_min_f64 v[2:3], s[16:17], v[0:1] -; GFX8-NEXT: v_cmp_u_f64_e32 vcc, s[16:17], v[0:1] +; GFX8-NEXT: v_cmp_o_f64_e32 vcc, s[16:17], v[0:1] ; GFX8-NEXT: v_mov_b32_e32 v4, 0x7ff80000 -; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc -; GFX8-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX8-NEXT: ;;#ASMSTART ; GFX8-NEXT: ; use v[0:1] ; GFX8-NEXT: ;;#ASMEND @@ -449,10 +449,10 @@ define void @s_minimum_f64(double inreg %src0, double inreg %src1) { ; GFX900-NEXT: v_mov_b32_e32 v0, s18 ; GFX900-NEXT: v_mov_b32_e32 v1, s19 ; GFX900-NEXT: v_min_f64 v[2:3], s[16:17], v[0:1] -; GFX900-NEXT: v_cmp_u_f64_e32 vcc, s[16:17], v[0:1] +; GFX900-NEXT: v_cmp_o_f64_e32 vcc, s[16:17], v[0:1] ; GFX900-NEXT: v_mov_b32_e32 v4, 0x7ff80000 -; GFX900-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc -; GFX900-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX900-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc +; GFX900-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use v[0:1] ; GFX900-NEXT: ;;#ASMEND @@ -464,10 +464,10 @@ define void @s_minimum_f64(double inreg %src0, double inreg %src1) { ; GFX950-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX950-NEXT: v_min_f64 v[2:3], s[0:1], v[0:1] ; GFX950-NEXT: v_mov_b32_e32 v4, 0x7ff80000 -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, s[0:1], v[0:1] +; GFX950-NEXT: v_cmp_o_f64_e32 vcc, s[0:1], v[0:1] ; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc -; GFX950-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX950-NEXT: ;;#ASMSTART ; GFX950-NEXT: ; use v[0:1] ; GFX950-NEXT: ;;#ASMEND @@ -520,85 +520,85 @@ define <2 x double> @v_minimum_v2f64(<2 x double> %src0, <2 x double> %src1) { ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: v_min_f64 v[8:9], v[0:1], v[4:5] -; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5] +; GFX7-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[4:5] ; GFX7-NEXT: v_min_f64 v[4:5], v[2:3], v[6:7] -; GFX7-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[6:7] +; GFX7-NEXT: v_cmp_o_f64_e64 s[4:5], v[2:3], v[6:7] ; GFX7-NEXT: v_mov_b32_e32 v3, 0x7ff80000 -; GFX7-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc -; GFX7-NEXT: v_cndmask_b32_e32 v1, v9, v3, vcc -; GFX7-NEXT: v_cndmask_b32_e64 v2, v4, 0, s[4:5] -; GFX7-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[4:5] +; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v8, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v9, vcc +; GFX7-NEXT: v_cndmask_b32_e64 v2, 0, v4, s[4:5] +; GFX7-NEXT: v_cndmask_b32_e64 v3, v3, v5, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_minimum_v2f64: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_min_f64 v[8:9], v[0:1], v[4:5] -; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5] +; GFX8-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[4:5] ; GFX8-NEXT: v_min_f64 v[4:5], v[2:3], v[6:7] -; GFX8-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[6:7] +; GFX8-NEXT: v_cmp_o_f64_e64 s[4:5], v[2:3], v[6:7] ; GFX8-NEXT: v_mov_b32_e32 v3, 0x7ff80000 -; GFX8-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc -; GFX8-NEXT: v_cndmask_b32_e32 v1, v9, v3, vcc -; GFX8-NEXT: v_cndmask_b32_e64 v2, v4, 0, s[4:5] -; GFX8-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[4:5] +; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v8, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v9, vcc +; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, v4, s[4:5] +; GFX8-NEXT: v_cndmask_b32_e64 v3, v3, v5, s[4:5] ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-LABEL: v_minimum_v2f64: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: v_min_f64 v[8:9], v[0:1], v[4:5] -; GFX900-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5] +; GFX900-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[4:5] ; GFX900-NEXT: v_min_f64 v[4:5], v[2:3], v[6:7] -; GFX900-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[6:7] +; GFX900-NEXT: v_cmp_o_f64_e64 s[4:5], v[2:3], v[6:7] ; GFX900-NEXT: v_mov_b32_e32 v3, 0x7ff80000 -; GFX900-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc -; GFX900-NEXT: v_cndmask_b32_e32 v1, v9, v3, vcc -; GFX900-NEXT: v_cndmask_b32_e64 v2, v4, 0, s[4:5] -; GFX900-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[4:5] +; GFX900-NEXT: v_cndmask_b32_e32 v0, 0, v8, vcc +; GFX900-NEXT: v_cndmask_b32_e32 v1, v3, v9, vcc +; GFX900-NEXT: v_cndmask_b32_e64 v2, 0, v4, s[4:5] +; GFX900-NEXT: v_cndmask_b32_e64 v3, v3, v5, s[4:5] ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_minimum_v2f64: ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX950-NEXT: v_min_f64 v[8:9], v[0:1], v[4:5] -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5] +; GFX950-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[4:5] ; GFX950-NEXT: v_min_f64 v[4:5], v[2:3], v[6:7] ; GFX950-NEXT: s_nop 0 -; GFX950-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v0, 0, v8, vcc ; GFX950-NEXT: v_mov_b32_e32 v8, 0x7ff80000 -; GFX950-NEXT: v_cndmask_b32_e32 v1, v9, v8, vcc -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[6:7] +; GFX950-NEXT: v_cndmask_b32_e32 v1, v8, v9, vcc +; GFX950-NEXT: v_cmp_o_f64_e32 vcc, v[2:3], v[6:7] ; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e64 v2, v4, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v3, v5, v8, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v3, v8, v5, vcc ; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_minimum_v2f64: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_min_f64 v[8:9], v[0:1], v[4:5] -; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[4:5] +; GFX10-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[4:5] ; GFX10-NEXT: v_min_f64 v[4:5], v[2:3], v[6:7] -; GFX10-NEXT: v_cmp_u_f64_e64 s4, v[2:3], v[6:7] -; GFX10-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v1, v9, 0x7ff80000, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v2, v4, 0, s4 -; GFX10-NEXT: v_cndmask_b32_e64 v3, v5, 0x7ff80000, s4 +; GFX10-NEXT: v_cmp_o_f64_e64 s4, v[2:3], v[6:7] +; GFX10-NEXT: v_cndmask_b32_e32 v0, 0, v8, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v9, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, v4, s4 +; GFX10-NEXT: v_cndmask_b32_e64 v3, 0x7ff80000, v5, s4 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_minimum_v2f64: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_min_f64 v[8:9], v[0:1], v[4:5] -; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[4:5] +; GFX11-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[4:5] ; GFX11-NEXT: v_min_f64 v[4:5], v[2:3], v[6:7] -; GFX11-NEXT: v_cmp_u_f64_e64 s0, v[2:3], v[6:7] +; GFX11-NEXT: v_cmp_o_f64_e64 s0, v[2:3], v[6:7] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3) -; GFX11-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v1, v9, 0x7ff80000, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v2, v4, 0, s0 -; GFX11-NEXT: v_cndmask_b32_e64 v3, v5, 0x7ff80000, s0 +; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v8, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v9, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, v4, s0 +; GFX11-NEXT: v_cndmask_b32_e64 v3, 0x7ff80000, v5, s0 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: v_minimum_v2f64: @@ -670,85 +670,85 @@ define <2 x double> @v_minimum_v2f64__nsz(<2 x double> %src0, <2 x double> %src1 ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: v_min_f64 v[8:9], v[0:1], v[4:5] -; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5] +; GFX7-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[4:5] ; GFX7-NEXT: v_min_f64 v[4:5], v[2:3], v[6:7] -; GFX7-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[6:7] +; GFX7-NEXT: v_cmp_o_f64_e64 s[4:5], v[2:3], v[6:7] ; GFX7-NEXT: v_mov_b32_e32 v3, 0x7ff80000 -; GFX7-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc -; GFX7-NEXT: v_cndmask_b32_e32 v1, v9, v3, vcc -; GFX7-NEXT: v_cndmask_b32_e64 v2, v4, 0, s[4:5] -; GFX7-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[4:5] +; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v8, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v9, vcc +; GFX7-NEXT: v_cndmask_b32_e64 v2, 0, v4, s[4:5] +; GFX7-NEXT: v_cndmask_b32_e64 v3, v3, v5, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_minimum_v2f64__nsz: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_min_f64 v[8:9], v[0:1], v[4:5] -; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5] +; GFX8-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[4:5] ; GFX8-NEXT: v_min_f64 v[4:5], v[2:3], v[6:7] -; GFX8-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[6:7] +; GFX8-NEXT: v_cmp_o_f64_e64 s[4:5], v[2:3], v[6:7] ; GFX8-NEXT: v_mov_b32_e32 v3, 0x7ff80000 -; GFX8-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc -; GFX8-NEXT: v_cndmask_b32_e32 v1, v9, v3, vcc -; GFX8-NEXT: v_cndmask_b32_e64 v2, v4, 0, s[4:5] -; GFX8-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[4:5] +; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v8, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v9, vcc +; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, v4, s[4:5] +; GFX8-NEXT: v_cndmask_b32_e64 v3, v3, v5, s[4:5] ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-LABEL: v_minimum_v2f64__nsz: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: v_min_f64 v[8:9], v[0:1], v[4:5] -; GFX900-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5] +; GFX900-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[4:5] ; GFX900-NEXT: v_min_f64 v[4:5], v[2:3], v[6:7] -; GFX900-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[6:7] +; GFX900-NEXT: v_cmp_o_f64_e64 s[4:5], v[2:3], v[6:7] ; GFX900-NEXT: v_mov_b32_e32 v3, 0x7ff80000 -; GFX900-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc -; GFX900-NEXT: v_cndmask_b32_e32 v1, v9, v3, vcc -; GFX900-NEXT: v_cndmask_b32_e64 v2, v4, 0, s[4:5] -; GFX900-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[4:5] +; GFX900-NEXT: v_cndmask_b32_e32 v0, 0, v8, vcc +; GFX900-NEXT: v_cndmask_b32_e32 v1, v3, v9, vcc +; GFX900-NEXT: v_cndmask_b32_e64 v2, 0, v4, s[4:5] +; GFX900-NEXT: v_cndmask_b32_e64 v3, v3, v5, s[4:5] ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_minimum_v2f64__nsz: ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX950-NEXT: v_min_f64 v[8:9], v[0:1], v[4:5] -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5] +; GFX950-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[4:5] ; GFX950-NEXT: v_min_f64 v[4:5], v[2:3], v[6:7] ; GFX950-NEXT: s_nop 0 -; GFX950-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v0, 0, v8, vcc ; GFX950-NEXT: v_mov_b32_e32 v8, 0x7ff80000 -; GFX950-NEXT: v_cndmask_b32_e32 v1, v9, v8, vcc -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[6:7] +; GFX950-NEXT: v_cndmask_b32_e32 v1, v8, v9, vcc +; GFX950-NEXT: v_cmp_o_f64_e32 vcc, v[2:3], v[6:7] ; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e64 v2, v4, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v3, v5, v8, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v3, v8, v5, vcc ; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_minimum_v2f64__nsz: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_min_f64 v[8:9], v[0:1], v[4:5] -; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[4:5] +; GFX10-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[4:5] ; GFX10-NEXT: v_min_f64 v[4:5], v[2:3], v[6:7] -; GFX10-NEXT: v_cmp_u_f64_e64 s4, v[2:3], v[6:7] -; GFX10-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v1, v9, 0x7ff80000, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v2, v4, 0, s4 -; GFX10-NEXT: v_cndmask_b32_e64 v3, v5, 0x7ff80000, s4 +; GFX10-NEXT: v_cmp_o_f64_e64 s4, v[2:3], v[6:7] +; GFX10-NEXT: v_cndmask_b32_e32 v0, 0, v8, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v9, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, v4, s4 +; GFX10-NEXT: v_cndmask_b32_e64 v3, 0x7ff80000, v5, s4 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_minimum_v2f64__nsz: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_min_f64 v[8:9], v[0:1], v[4:5] -; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[4:5] +; GFX11-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[4:5] ; GFX11-NEXT: v_min_f64 v[4:5], v[2:3], v[6:7] -; GFX11-NEXT: v_cmp_u_f64_e64 s0, v[2:3], v[6:7] +; GFX11-NEXT: v_cmp_o_f64_e64 s0, v[2:3], v[6:7] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3) -; GFX11-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v1, v9, 0x7ff80000, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v2, v4, 0, s0 -; GFX11-NEXT: v_cndmask_b32_e64 v3, v5, 0x7ff80000, s0 +; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v8, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v9, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, v4, s0 +; GFX11-NEXT: v_cndmask_b32_e64 v3, 0x7ff80000, v5, s0 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: v_minimum_v2f64__nsz: @@ -822,6 +822,7 @@ define void @s_minimum_v2f64(<2 x double> inreg %src0, <2 x double> inreg %src1) ; GFX7-NEXT: v_mov_b32_e32 v0, s22 ; GFX7-NEXT: v_mov_b32_e32 v1, s23 ; GFX7-NEXT: v_min_f64 v[2:3], s[18:19], v[0:1] +<<<<<<< HEAD ; GFX7-NEXT: v_cmp_u_f64_e32 vcc, s[18:19], v[0:1] ; GFX7-NEXT: v_mov_b32_e32 v0, s20 ; GFX7-NEXT: v_mov_b32_e32 v1, s21 @@ -832,6 +833,16 @@ define void @s_minimum_v2f64(<2 x double> inreg %src0, <2 x double> inreg %src1) ; GFX7-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc ; GFX7-NEXT: v_cndmask_b32_e64 v1, v5, v6, s[4:5] ; GFX7-NEXT: v_cndmask_b32_e64 v0, v4, 0, s[4:5] +======= +; GFX7-NEXT: v_cmp_o_f64_e32 vcc, s[18:19], v[0:1] +; GFX7-NEXT: v_min_f64 v[0:1], s[16:17], v[4:5] +; GFX7-NEXT: v_cmp_o_f64_e64 s[4:5], s[16:17], v[4:5] +; GFX7-NEXT: v_mov_b32_e32 v6, 0x7ff80000 +; GFX7-NEXT: v_cndmask_b32_e32 v3, v6, v3, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GFX7-NEXT: v_cndmask_b32_e64 v1, v6, v1, s[4:5] +; GFX7-NEXT: v_cndmask_b32_e64 v0, 0, v0, s[4:5] +>>>>>>> 1336afc5defe (update tests) ; GFX7-NEXT: ;;#ASMSTART ; GFX7-NEXT: ; use v[0:3] ; GFX7-NEXT: ;;#ASMEND @@ -843,6 +854,7 @@ define void @s_minimum_v2f64(<2 x double> inreg %src0, <2 x double> inreg %src1) ; GFX8-NEXT: v_mov_b32_e32 v0, s22 ; GFX8-NEXT: v_mov_b32_e32 v1, s23 ; GFX8-NEXT: v_min_f64 v[2:3], s[18:19], v[0:1] +<<<<<<< HEAD ; GFX8-NEXT: v_cmp_u_f64_e32 vcc, s[18:19], v[0:1] ; GFX8-NEXT: v_mov_b32_e32 v0, s20 ; GFX8-NEXT: v_mov_b32_e32 v1, s21 @@ -853,6 +865,16 @@ define void @s_minimum_v2f64(<2 x double> inreg %src0, <2 x double> inreg %src1) ; GFX8-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc ; GFX8-NEXT: v_cndmask_b32_e64 v1, v5, v6, s[4:5] ; GFX8-NEXT: v_cndmask_b32_e64 v0, v4, 0, s[4:5] +======= +; GFX8-NEXT: v_cmp_o_f64_e32 vcc, s[18:19], v[0:1] +; GFX8-NEXT: v_min_f64 v[0:1], s[16:17], v[4:5] +; GFX8-NEXT: v_cmp_o_f64_e64 s[4:5], s[16:17], v[4:5] +; GFX8-NEXT: v_mov_b32_e32 v6, 0x7ff80000 +; GFX8-NEXT: v_cndmask_b32_e32 v3, v6, v3, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GFX8-NEXT: v_cndmask_b32_e64 v1, v6, v1, s[4:5] +; GFX8-NEXT: v_cndmask_b32_e64 v0, 0, v0, s[4:5] +>>>>>>> 1336afc5defe (update tests) ; GFX8-NEXT: ;;#ASMSTART ; GFX8-NEXT: ; use v[0:3] ; GFX8-NEXT: ;;#ASMEND @@ -864,6 +886,7 @@ define void @s_minimum_v2f64(<2 x double> inreg %src0, <2 x double> inreg %src1) ; GFX900-NEXT: v_mov_b32_e32 v0, s22 ; GFX900-NEXT: v_mov_b32_e32 v1, s23 ; GFX900-NEXT: v_min_f64 v[2:3], s[18:19], v[0:1] +<<<<<<< HEAD ; GFX900-NEXT: v_cmp_u_f64_e32 vcc, s[18:19], v[0:1] ; GFX900-NEXT: v_mov_b32_e32 v0, s20 ; GFX900-NEXT: v_mov_b32_e32 v1, s21 @@ -874,6 +897,16 @@ define void @s_minimum_v2f64(<2 x double> inreg %src0, <2 x double> inreg %src1) ; GFX900-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc ; GFX900-NEXT: v_cndmask_b32_e64 v1, v5, v6, s[4:5] ; GFX900-NEXT: v_cndmask_b32_e64 v0, v4, 0, s[4:5] +======= +; GFX900-NEXT: v_cmp_o_f64_e32 vcc, s[18:19], v[0:1] +; GFX900-NEXT: v_min_f64 v[0:1], s[16:17], v[4:5] +; GFX900-NEXT: v_cmp_o_f64_e64 s[4:5], s[16:17], v[4:5] +; GFX900-NEXT: v_mov_b32_e32 v6, 0x7ff80000 +; GFX900-NEXT: v_cndmask_b32_e32 v3, v6, v3, vcc +; GFX900-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GFX900-NEXT: v_cndmask_b32_e64 v1, v6, v1, s[4:5] +; GFX900-NEXT: v_cndmask_b32_e64 v0, 0, v0, s[4:5] +>>>>>>> 1336afc5defe (update tests) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use v[0:3] ; GFX900-NEXT: ;;#ASMEND @@ -885,15 +918,15 @@ define void @s_minimum_v2f64(<2 x double> inreg %src0, <2 x double> inreg %src1) ; GFX950-NEXT: v_mov_b64_e32 v[0:1], s[18:19] ; GFX950-NEXT: v_min_f64 v[2:3], s[2:3], v[0:1] ; GFX950-NEXT: v_mov_b32_e32 v6, 0x7ff80000 -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, s[2:3], v[0:1] +; GFX950-NEXT: v_cmp_o_f64_e32 vcc, s[2:3], v[0:1] ; GFX950-NEXT: v_mov_b64_e32 v[0:1], s[16:17] ; GFX950-NEXT: v_min_f64 v[4:5], s[0:1], v[0:1] -; GFX950-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc -; GFX950-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, s[0:1], v[0:1] +; GFX950-NEXT: v_cndmask_b32_e32 v3, v6, v3, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GFX950-NEXT: v_cmp_o_f64_e32 vcc, s[0:1], v[0:1] ; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc -; GFX950-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc ; GFX950-NEXT: ;;#ASMSTART ; GFX950-NEXT: ; use v[0:3] ; GFX950-NEXT: ;;#ASMEND @@ -955,109 +988,109 @@ define <3 x double> @v_minimum_v3f64(<3 x double> %src0, <3 x double> %src1) { ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: v_min_f64 v[12:13], v[0:1], v[6:7] -; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7] +; GFX7-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[6:7] ; GFX7-NEXT: v_min_f64 v[6:7], v[2:3], v[8:9] -; GFX7-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[8:9] +; GFX7-NEXT: v_cmp_o_f64_e64 s[4:5], v[2:3], v[8:9] ; GFX7-NEXT: v_min_f64 v[8:9], v[4:5], v[10:11] -; GFX7-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[10:11] +; GFX7-NEXT: v_cmp_o_f64_e64 s[6:7], v[4:5], v[10:11] ; GFX7-NEXT: v_mov_b32_e32 v5, 0x7ff80000 -; GFX7-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc -; GFX7-NEXT: v_cndmask_b32_e32 v1, v13, v5, vcc -; GFX7-NEXT: v_cndmask_b32_e64 v2, v6, 0, s[4:5] -; GFX7-NEXT: v_cndmask_b32_e64 v3, v7, v5, s[4:5] -; GFX7-NEXT: v_cndmask_b32_e64 v4, v8, 0, s[6:7] -; GFX7-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[6:7] +; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v12, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v1, v5, v13, vcc +; GFX7-NEXT: v_cndmask_b32_e64 v2, 0, v6, s[4:5] +; GFX7-NEXT: v_cndmask_b32_e64 v3, v5, v7, s[4:5] +; GFX7-NEXT: v_cndmask_b32_e64 v4, 0, v8, s[6:7] +; GFX7-NEXT: v_cndmask_b32_e64 v5, v5, v9, s[6:7] ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_minimum_v3f64: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_min_f64 v[12:13], v[0:1], v[6:7] -; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7] +; GFX8-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[6:7] ; GFX8-NEXT: v_min_f64 v[6:7], v[2:3], v[8:9] -; GFX8-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[8:9] +; GFX8-NEXT: v_cmp_o_f64_e64 s[4:5], v[2:3], v[8:9] ; GFX8-NEXT: v_min_f64 v[8:9], v[4:5], v[10:11] -; GFX8-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[10:11] +; GFX8-NEXT: v_cmp_o_f64_e64 s[6:7], v[4:5], v[10:11] ; GFX8-NEXT: v_mov_b32_e32 v5, 0x7ff80000 -; GFX8-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc -; GFX8-NEXT: v_cndmask_b32_e32 v1, v13, v5, vcc -; GFX8-NEXT: v_cndmask_b32_e64 v2, v6, 0, s[4:5] -; GFX8-NEXT: v_cndmask_b32_e64 v3, v7, v5, s[4:5] -; GFX8-NEXT: v_cndmask_b32_e64 v4, v8, 0, s[6:7] -; GFX8-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[6:7] +; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v12, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v1, v5, v13, vcc +; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, v6, s[4:5] +; GFX8-NEXT: v_cndmask_b32_e64 v3, v5, v7, s[4:5] +; GFX8-NEXT: v_cndmask_b32_e64 v4, 0, v8, s[6:7] +; GFX8-NEXT: v_cndmask_b32_e64 v5, v5, v9, s[6:7] ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-LABEL: v_minimum_v3f64: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: v_min_f64 v[12:13], v[0:1], v[6:7] -; GFX900-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7] +; GFX900-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[6:7] ; GFX900-NEXT: v_min_f64 v[6:7], v[2:3], v[8:9] -; GFX900-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[8:9] +; GFX900-NEXT: v_cmp_o_f64_e64 s[4:5], v[2:3], v[8:9] ; GFX900-NEXT: v_min_f64 v[8:9], v[4:5], v[10:11] -; GFX900-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[10:11] +; GFX900-NEXT: v_cmp_o_f64_e64 s[6:7], v[4:5], v[10:11] ; GFX900-NEXT: v_mov_b32_e32 v5, 0x7ff80000 -; GFX900-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc -; GFX900-NEXT: v_cndmask_b32_e32 v1, v13, v5, vcc -; GFX900-NEXT: v_cndmask_b32_e64 v2, v6, 0, s[4:5] -; GFX900-NEXT: v_cndmask_b32_e64 v3, v7, v5, s[4:5] -; GFX900-NEXT: v_cndmask_b32_e64 v4, v8, 0, s[6:7] -; GFX900-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[6:7] +; GFX900-NEXT: v_cndmask_b32_e32 v0, 0, v12, vcc +; GFX900-NEXT: v_cndmask_b32_e32 v1, v5, v13, vcc +; GFX900-NEXT: v_cndmask_b32_e64 v2, 0, v6, s[4:5] +; GFX900-NEXT: v_cndmask_b32_e64 v3, v5, v7, s[4:5] +; GFX900-NEXT: v_cndmask_b32_e64 v4, 0, v8, s[6:7] +; GFX900-NEXT: v_cndmask_b32_e64 v5, v5, v9, s[6:7] ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_minimum_v3f64: ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX950-NEXT: v_min_f64 v[12:13], v[0:1], v[6:7] -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7] +; GFX950-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[6:7] ; GFX950-NEXT: v_min_f64 v[6:7], v[2:3], v[8:9] ; GFX950-NEXT: s_nop 0 -; GFX950-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v0, 0, v12, vcc ; GFX950-NEXT: v_mov_b32_e32 v12, 0x7ff80000 -; GFX950-NEXT: v_cndmask_b32_e32 v1, v13, v12, vcc -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[8:9] +; GFX950-NEXT: v_cndmask_b32_e32 v1, v12, v13, vcc +; GFX950-NEXT: v_cmp_o_f64_e32 vcc, v[2:3], v[8:9] ; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e64 v2, v6, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v3, v7, v12, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v2, 0, v6, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v3, v12, v7, vcc ; GFX950-NEXT: v_min_f64 v[6:7], v[4:5], v[10:11] -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[10:11] +; GFX950-NEXT: v_cmp_o_f64_e32 vcc, v[4:5], v[10:11] ; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e64 v4, v6, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v5, v7, v12, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v4, 0, v6, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v5, v12, v7, vcc ; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_minimum_v3f64: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_min_f64 v[12:13], v[0:1], v[6:7] -; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[6:7] +; GFX10-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[6:7] ; GFX10-NEXT: v_min_f64 v[6:7], v[2:3], v[8:9] -; GFX10-NEXT: v_cmp_u_f64_e64 s4, v[2:3], v[8:9] +; GFX10-NEXT: v_cmp_o_f64_e64 s4, v[2:3], v[8:9] ; GFX10-NEXT: v_min_f64 v[8:9], v[4:5], v[10:11] -; GFX10-NEXT: v_cmp_u_f64_e64 s5, v[4:5], v[10:11] -; GFX10-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v1, v13, 0x7ff80000, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v2, v6, 0, s4 -; GFX10-NEXT: v_cndmask_b32_e64 v3, v7, 0x7ff80000, s4 -; GFX10-NEXT: v_cndmask_b32_e64 v4, v8, 0, s5 -; GFX10-NEXT: v_cndmask_b32_e64 v5, v9, 0x7ff80000, s5 +; GFX10-NEXT: v_cmp_o_f64_e64 s5, v[4:5], v[10:11] +; GFX10-NEXT: v_cndmask_b32_e32 v0, 0, v12, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v13, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, v6, s4 +; GFX10-NEXT: v_cndmask_b32_e64 v3, 0x7ff80000, v7, s4 +; GFX10-NEXT: v_cndmask_b32_e64 v4, 0, v8, s5 +; GFX10-NEXT: v_cndmask_b32_e64 v5, 0x7ff80000, v9, s5 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_minimum_v3f64: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_min_f64 v[12:13], v[0:1], v[6:7] -; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[6:7] +; GFX11-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[6:7] ; GFX11-NEXT: v_min_f64 v[6:7], v[2:3], v[8:9] -; GFX11-NEXT: v_cmp_u_f64_e64 s0, v[2:3], v[8:9] +; GFX11-NEXT: v_cmp_o_f64_e64 s0, v[2:3], v[8:9] ; GFX11-NEXT: v_min_f64 v[8:9], v[4:5], v[10:11] -; GFX11-NEXT: v_cmp_u_f64_e64 s1, v[4:5], v[10:11] -; GFX11-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v1, v13, 0x7ff80000, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v2, v6, 0, s0 -; GFX11-NEXT: v_cndmask_b32_e64 v3, v7, 0x7ff80000, s0 -; GFX11-NEXT: v_cndmask_b32_e64 v4, v8, 0, s1 -; GFX11-NEXT: v_cndmask_b32_e64 v5, v9, 0x7ff80000, s1 +; GFX11-NEXT: v_cmp_o_f64_e64 s1, v[4:5], v[10:11] +; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v12, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v13, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, v6, s0 +; GFX11-NEXT: v_cndmask_b32_e64 v3, 0x7ff80000, v7, s0 +; GFX11-NEXT: v_cndmask_b32_e64 v4, 0, v8, s1 +; GFX11-NEXT: v_cndmask_b32_e64 v5, 0x7ff80000, v9, s1 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: v_minimum_v3f64: @@ -1136,109 +1169,109 @@ define <3 x double> @v_minimum_v3f64__nsz(<3 x double> %src0, <3 x double> %src1 ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: v_min_f64 v[12:13], v[0:1], v[6:7] -; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7] +; GFX7-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[6:7] ; GFX7-NEXT: v_min_f64 v[6:7], v[2:3], v[8:9] -; GFX7-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[8:9] +; GFX7-NEXT: v_cmp_o_f64_e64 s[4:5], v[2:3], v[8:9] ; GFX7-NEXT: v_min_f64 v[8:9], v[4:5], v[10:11] -; GFX7-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[10:11] +; GFX7-NEXT: v_cmp_o_f64_e64 s[6:7], v[4:5], v[10:11] ; GFX7-NEXT: v_mov_b32_e32 v5, 0x7ff80000 -; GFX7-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc -; GFX7-NEXT: v_cndmask_b32_e32 v1, v13, v5, vcc -; GFX7-NEXT: v_cndmask_b32_e64 v2, v6, 0, s[4:5] -; GFX7-NEXT: v_cndmask_b32_e64 v3, v7, v5, s[4:5] -; GFX7-NEXT: v_cndmask_b32_e64 v4, v8, 0, s[6:7] -; GFX7-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[6:7] +; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v12, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v1, v5, v13, vcc +; GFX7-NEXT: v_cndmask_b32_e64 v2, 0, v6, s[4:5] +; GFX7-NEXT: v_cndmask_b32_e64 v3, v5, v7, s[4:5] +; GFX7-NEXT: v_cndmask_b32_e64 v4, 0, v8, s[6:7] +; GFX7-NEXT: v_cndmask_b32_e64 v5, v5, v9, s[6:7] ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_minimum_v3f64__nsz: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_min_f64 v[12:13], v[0:1], v[6:7] -; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7] +; GFX8-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[6:7] ; GFX8-NEXT: v_min_f64 v[6:7], v[2:3], v[8:9] -; GFX8-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[8:9] +; GFX8-NEXT: v_cmp_o_f64_e64 s[4:5], v[2:3], v[8:9] ; GFX8-NEXT: v_min_f64 v[8:9], v[4:5], v[10:11] -; GFX8-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[10:11] +; GFX8-NEXT: v_cmp_o_f64_e64 s[6:7], v[4:5], v[10:11] ; GFX8-NEXT: v_mov_b32_e32 v5, 0x7ff80000 -; GFX8-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc -; GFX8-NEXT: v_cndmask_b32_e32 v1, v13, v5, vcc -; GFX8-NEXT: v_cndmask_b32_e64 v2, v6, 0, s[4:5] -; GFX8-NEXT: v_cndmask_b32_e64 v3, v7, v5, s[4:5] -; GFX8-NEXT: v_cndmask_b32_e64 v4, v8, 0, s[6:7] -; GFX8-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[6:7] +; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v12, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v1, v5, v13, vcc +; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, v6, s[4:5] +; GFX8-NEXT: v_cndmask_b32_e64 v3, v5, v7, s[4:5] +; GFX8-NEXT: v_cndmask_b32_e64 v4, 0, v8, s[6:7] +; GFX8-NEXT: v_cndmask_b32_e64 v5, v5, v9, s[6:7] ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-LABEL: v_minimum_v3f64__nsz: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: v_min_f64 v[12:13], v[0:1], v[6:7] -; GFX900-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7] +; GFX900-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[6:7] ; GFX900-NEXT: v_min_f64 v[6:7], v[2:3], v[8:9] -; GFX900-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[8:9] +; GFX900-NEXT: v_cmp_o_f64_e64 s[4:5], v[2:3], v[8:9] ; GFX900-NEXT: v_min_f64 v[8:9], v[4:5], v[10:11] -; GFX900-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[10:11] +; GFX900-NEXT: v_cmp_o_f64_e64 s[6:7], v[4:5], v[10:11] ; GFX900-NEXT: v_mov_b32_e32 v5, 0x7ff80000 -; GFX900-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc -; GFX900-NEXT: v_cndmask_b32_e32 v1, v13, v5, vcc -; GFX900-NEXT: v_cndmask_b32_e64 v2, v6, 0, s[4:5] -; GFX900-NEXT: v_cndmask_b32_e64 v3, v7, v5, s[4:5] -; GFX900-NEXT: v_cndmask_b32_e64 v4, v8, 0, s[6:7] -; GFX900-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[6:7] +; GFX900-NEXT: v_cndmask_b32_e32 v0, 0, v12, vcc +; GFX900-NEXT: v_cndmask_b32_e32 v1, v5, v13, vcc +; GFX900-NEXT: v_cndmask_b32_e64 v2, 0, v6, s[4:5] +; GFX900-NEXT: v_cndmask_b32_e64 v3, v5, v7, s[4:5] +; GFX900-NEXT: v_cndmask_b32_e64 v4, 0, v8, s[6:7] +; GFX900-NEXT: v_cndmask_b32_e64 v5, v5, v9, s[6:7] ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_minimum_v3f64__nsz: ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX950-NEXT: v_min_f64 v[12:13], v[0:1], v[6:7] -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7] +; GFX950-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[6:7] ; GFX950-NEXT: v_min_f64 v[6:7], v[2:3], v[8:9] ; GFX950-NEXT: s_nop 0 -; GFX950-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v0, 0, v12, vcc ; GFX950-NEXT: v_mov_b32_e32 v12, 0x7ff80000 -; GFX950-NEXT: v_cndmask_b32_e32 v1, v13, v12, vcc -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[8:9] +; GFX950-NEXT: v_cndmask_b32_e32 v1, v12, v13, vcc +; GFX950-NEXT: v_cmp_o_f64_e32 vcc, v[2:3], v[8:9] ; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e64 v2, v6, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v3, v7, v12, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v2, 0, v6, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v3, v12, v7, vcc ; GFX950-NEXT: v_min_f64 v[6:7], v[4:5], v[10:11] -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[10:11] +; GFX950-NEXT: v_cmp_o_f64_e32 vcc, v[4:5], v[10:11] ; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e64 v4, v6, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v5, v7, v12, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v4, 0, v6, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v5, v12, v7, vcc ; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_minimum_v3f64__nsz: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_min_f64 v[12:13], v[0:1], v[6:7] -; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[6:7] +; GFX10-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[6:7] ; GFX10-NEXT: v_min_f64 v[6:7], v[2:3], v[8:9] -; GFX10-NEXT: v_cmp_u_f64_e64 s4, v[2:3], v[8:9] +; GFX10-NEXT: v_cmp_o_f64_e64 s4, v[2:3], v[8:9] ; GFX10-NEXT: v_min_f64 v[8:9], v[4:5], v[10:11] -; GFX10-NEXT: v_cmp_u_f64_e64 s5, v[4:5], v[10:11] -; GFX10-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v1, v13, 0x7ff80000, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v2, v6, 0, s4 -; GFX10-NEXT: v_cndmask_b32_e64 v3, v7, 0x7ff80000, s4 -; GFX10-NEXT: v_cndmask_b32_e64 v4, v8, 0, s5 -; GFX10-NEXT: v_cndmask_b32_e64 v5, v9, 0x7ff80000, s5 +; GFX10-NEXT: v_cmp_o_f64_e64 s5, v[4:5], v[10:11] +; GFX10-NEXT: v_cndmask_b32_e32 v0, 0, v12, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v13, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, v6, s4 +; GFX10-NEXT: v_cndmask_b32_e64 v3, 0x7ff80000, v7, s4 +; GFX10-NEXT: v_cndmask_b32_e64 v4, 0, v8, s5 +; GFX10-NEXT: v_cndmask_b32_e64 v5, 0x7ff80000, v9, s5 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_minimum_v3f64__nsz: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_min_f64 v[12:13], v[0:1], v[6:7] -; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[6:7] +; GFX11-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[6:7] ; GFX11-NEXT: v_min_f64 v[6:7], v[2:3], v[8:9] -; GFX11-NEXT: v_cmp_u_f64_e64 s0, v[2:3], v[8:9] +; GFX11-NEXT: v_cmp_o_f64_e64 s0, v[2:3], v[8:9] ; GFX11-NEXT: v_min_f64 v[8:9], v[4:5], v[10:11] -; GFX11-NEXT: v_cmp_u_f64_e64 s1, v[4:5], v[10:11] -; GFX11-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v1, v13, 0x7ff80000, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v2, v6, 0, s0 -; GFX11-NEXT: v_cndmask_b32_e64 v3, v7, 0x7ff80000, s0 -; GFX11-NEXT: v_cndmask_b32_e64 v4, v8, 0, s1 -; GFX11-NEXT: v_cndmask_b32_e64 v5, v9, 0x7ff80000, s1 +; GFX11-NEXT: v_cmp_o_f64_e64 s1, v[4:5], v[10:11] +; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v12, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v13, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, v6, s0 +; GFX11-NEXT: v_cndmask_b32_e64 v3, 0x7ff80000, v7, s0 +; GFX11-NEXT: v_cndmask_b32_e64 v4, 0, v8, s1 +; GFX11-NEXT: v_cndmask_b32_e64 v5, 0x7ff80000, v9, s1 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: v_minimum_v3f64__nsz: @@ -1317,134 +1350,134 @@ define <4 x double> @v_minimum_v4f64(<4 x double> %src0, <4 x double> %src1) { ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: v_min_f64 v[16:17], v[0:1], v[8:9] -; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9] +; GFX7-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[8:9] ; GFX7-NEXT: v_min_f64 v[8:9], v[2:3], v[10:11] -; GFX7-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[10:11] +; GFX7-NEXT: v_cmp_o_f64_e64 s[4:5], v[2:3], v[10:11] ; GFX7-NEXT: v_min_f64 v[10:11], v[4:5], v[12:13] -; GFX7-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[12:13] +; GFX7-NEXT: v_cmp_o_f64_e64 s[6:7], v[4:5], v[12:13] ; GFX7-NEXT: v_min_f64 v[12:13], v[6:7], v[14:15] -; GFX7-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[14:15] +; GFX7-NEXT: v_cmp_o_f64_e64 s[8:9], v[6:7], v[14:15] ; GFX7-NEXT: v_mov_b32_e32 v7, 0x7ff80000 -; GFX7-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc -; GFX7-NEXT: v_cndmask_b32_e32 v1, v17, v7, vcc -; GFX7-NEXT: v_cndmask_b32_e64 v2, v8, 0, s[4:5] -; GFX7-NEXT: v_cndmask_b32_e64 v3, v9, v7, s[4:5] -; GFX7-NEXT: v_cndmask_b32_e64 v4, v10, 0, s[6:7] -; GFX7-NEXT: v_cndmask_b32_e64 v5, v11, v7, s[6:7] -; GFX7-NEXT: v_cndmask_b32_e64 v6, v12, 0, s[8:9] -; GFX7-NEXT: v_cndmask_b32_e64 v7, v13, v7, s[8:9] +; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v16, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v1, v7, v17, vcc +; GFX7-NEXT: v_cndmask_b32_e64 v2, 0, v8, s[4:5] +; GFX7-NEXT: v_cndmask_b32_e64 v3, v7, v9, s[4:5] +; GFX7-NEXT: v_cndmask_b32_e64 v4, 0, v10, s[6:7] +; GFX7-NEXT: v_cndmask_b32_e64 v5, v7, v11, s[6:7] +; GFX7-NEXT: v_cndmask_b32_e64 v6, 0, v12, s[8:9] +; GFX7-NEXT: v_cndmask_b32_e64 v7, v7, v13, s[8:9] ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_minimum_v4f64: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_min_f64 v[16:17], v[0:1], v[8:9] -; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9] +; GFX8-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[8:9] ; GFX8-NEXT: v_min_f64 v[8:9], v[2:3], v[10:11] -; GFX8-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[10:11] +; GFX8-NEXT: v_cmp_o_f64_e64 s[4:5], v[2:3], v[10:11] ; GFX8-NEXT: v_min_f64 v[10:11], v[4:5], v[12:13] -; GFX8-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[12:13] +; GFX8-NEXT: v_cmp_o_f64_e64 s[6:7], v[4:5], v[12:13] ; GFX8-NEXT: v_min_f64 v[12:13], v[6:7], v[14:15] -; GFX8-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[14:15] +; GFX8-NEXT: v_cmp_o_f64_e64 s[8:9], v[6:7], v[14:15] ; GFX8-NEXT: v_mov_b32_e32 v7, 0x7ff80000 -; GFX8-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc -; GFX8-NEXT: v_cndmask_b32_e32 v1, v17, v7, vcc -; GFX8-NEXT: v_cndmask_b32_e64 v2, v8, 0, s[4:5] -; GFX8-NEXT: v_cndmask_b32_e64 v3, v9, v7, s[4:5] -; GFX8-NEXT: v_cndmask_b32_e64 v4, v10, 0, s[6:7] -; GFX8-NEXT: v_cndmask_b32_e64 v5, v11, v7, s[6:7] -; GFX8-NEXT: v_cndmask_b32_e64 v6, v12, 0, s[8:9] -; GFX8-NEXT: v_cndmask_b32_e64 v7, v13, v7, s[8:9] +; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v16, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v1, v7, v17, vcc +; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, v8, s[4:5] +; GFX8-NEXT: v_cndmask_b32_e64 v3, v7, v9, s[4:5] +; GFX8-NEXT: v_cndmask_b32_e64 v4, 0, v10, s[6:7] +; GFX8-NEXT: v_cndmask_b32_e64 v5, v7, v11, s[6:7] +; GFX8-NEXT: v_cndmask_b32_e64 v6, 0, v12, s[8:9] +; GFX8-NEXT: v_cndmask_b32_e64 v7, v7, v13, s[8:9] ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-LABEL: v_minimum_v4f64: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: v_min_f64 v[16:17], v[0:1], v[8:9] -; GFX900-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9] +; GFX900-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[8:9] ; GFX900-NEXT: v_min_f64 v[8:9], v[2:3], v[10:11] -; GFX900-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[10:11] +; GFX900-NEXT: v_cmp_o_f64_e64 s[4:5], v[2:3], v[10:11] ; GFX900-NEXT: v_min_f64 v[10:11], v[4:5], v[12:13] -; GFX900-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[12:13] +; GFX900-NEXT: v_cmp_o_f64_e64 s[6:7], v[4:5], v[12:13] ; GFX900-NEXT: v_min_f64 v[12:13], v[6:7], v[14:15] -; GFX900-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[14:15] +; GFX900-NEXT: v_cmp_o_f64_e64 s[8:9], v[6:7], v[14:15] ; GFX900-NEXT: v_mov_b32_e32 v7, 0x7ff80000 -; GFX900-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc -; GFX900-NEXT: v_cndmask_b32_e32 v1, v17, v7, vcc -; GFX900-NEXT: v_cndmask_b32_e64 v2, v8, 0, s[4:5] -; GFX900-NEXT: v_cndmask_b32_e64 v3, v9, v7, s[4:5] -; GFX900-NEXT: v_cndmask_b32_e64 v4, v10, 0, s[6:7] -; GFX900-NEXT: v_cndmask_b32_e64 v5, v11, v7, s[6:7] -; GFX900-NEXT: v_cndmask_b32_e64 v6, v12, 0, s[8:9] -; GFX900-NEXT: v_cndmask_b32_e64 v7, v13, v7, s[8:9] +; GFX900-NEXT: v_cndmask_b32_e32 v0, 0, v16, vcc +; GFX900-NEXT: v_cndmask_b32_e32 v1, v7, v17, vcc +; GFX900-NEXT: v_cndmask_b32_e64 v2, 0, v8, s[4:5] +; GFX900-NEXT: v_cndmask_b32_e64 v3, v7, v9, s[4:5] +; GFX900-NEXT: v_cndmask_b32_e64 v4, 0, v10, s[6:7] +; GFX900-NEXT: v_cndmask_b32_e64 v5, v7, v11, s[6:7] +; GFX900-NEXT: v_cndmask_b32_e64 v6, 0, v12, s[8:9] +; GFX900-NEXT: v_cndmask_b32_e64 v7, v7, v13, s[8:9] ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_minimum_v4f64: ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX950-NEXT: v_min_f64 v[16:17], v[0:1], v[8:9] -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9] +; GFX950-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[8:9] ; GFX950-NEXT: v_min_f64 v[8:9], v[2:3], v[10:11] ; GFX950-NEXT: s_nop 0 -; GFX950-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v0, 0, v16, vcc ; GFX950-NEXT: v_mov_b32_e32 v16, 0x7ff80000 -; GFX950-NEXT: v_cndmask_b32_e32 v1, v17, v16, vcc -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[10:11] +; GFX950-NEXT: v_cndmask_b32_e32 v1, v16, v17, vcc +; GFX950-NEXT: v_cmp_o_f64_e32 vcc, v[2:3], v[10:11] ; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e64 v2, v8, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v3, v9, v16, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v2, 0, v8, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v3, v16, v9, vcc ; GFX950-NEXT: v_min_f64 v[8:9], v[4:5], v[12:13] -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[12:13] +; GFX950-NEXT: v_cmp_o_f64_e32 vcc, v[4:5], v[12:13] ; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e64 v4, v8, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v5, v9, v16, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v5, v16, v9, vcc ; GFX950-NEXT: v_min_f64 v[8:9], v[6:7], v[14:15] -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[6:7], v[14:15] +; GFX950-NEXT: v_cmp_o_f64_e32 vcc, v[6:7], v[14:15] ; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e64 v6, v8, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v7, v9, v16, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v6, 0, v8, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v7, v16, v9, vcc ; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_minimum_v4f64: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_min_f64 v[16:17], v[0:1], v[8:9] -; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[8:9] +; GFX10-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[8:9] ; GFX10-NEXT: v_min_f64 v[8:9], v[2:3], v[10:11] -; GFX10-NEXT: v_cmp_u_f64_e64 s4, v[2:3], v[10:11] +; GFX10-NEXT: v_cmp_o_f64_e64 s4, v[2:3], v[10:11] ; GFX10-NEXT: v_min_f64 v[10:11], v[4:5], v[12:13] -; GFX10-NEXT: v_cmp_u_f64_e64 s5, v[4:5], v[12:13] +; GFX10-NEXT: v_cmp_o_f64_e64 s5, v[4:5], v[12:13] ; GFX10-NEXT: v_min_f64 v[12:13], v[6:7], v[14:15] -; GFX10-NEXT: v_cmp_u_f64_e64 s6, v[6:7], v[14:15] -; GFX10-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v1, v17, 0x7ff80000, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v2, v8, 0, s4 -; GFX10-NEXT: v_cndmask_b32_e64 v3, v9, 0x7ff80000, s4 -; GFX10-NEXT: v_cndmask_b32_e64 v4, v10, 0, s5 -; GFX10-NEXT: v_cndmask_b32_e64 v5, v11, 0x7ff80000, s5 -; GFX10-NEXT: v_cndmask_b32_e64 v6, v12, 0, s6 -; GFX10-NEXT: v_cndmask_b32_e64 v7, v13, 0x7ff80000, s6 +; GFX10-NEXT: v_cmp_o_f64_e64 s6, v[6:7], v[14:15] +; GFX10-NEXT: v_cndmask_b32_e32 v0, 0, v16, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v17, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, v8, s4 +; GFX10-NEXT: v_cndmask_b32_e64 v3, 0x7ff80000, v9, s4 +; GFX10-NEXT: v_cndmask_b32_e64 v4, 0, v10, s5 +; GFX10-NEXT: v_cndmask_b32_e64 v5, 0x7ff80000, v11, s5 +; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, v12, s6 +; GFX10-NEXT: v_cndmask_b32_e64 v7, 0x7ff80000, v13, s6 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_minimum_v4f64: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_min_f64 v[16:17], v[0:1], v[8:9] -; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[8:9] +; GFX11-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[8:9] ; GFX11-NEXT: v_min_f64 v[8:9], v[2:3], v[10:11] -; GFX11-NEXT: v_cmp_u_f64_e64 s0, v[2:3], v[10:11] +; GFX11-NEXT: v_cmp_o_f64_e64 s0, v[2:3], v[10:11] ; GFX11-NEXT: v_min_f64 v[10:11], v[4:5], v[12:13] -; GFX11-NEXT: v_cmp_u_f64_e64 s1, v[4:5], v[12:13] +; GFX11-NEXT: v_cmp_o_f64_e64 s1, v[4:5], v[12:13] ; GFX11-NEXT: v_min_f64 v[12:13], v[6:7], v[14:15] -; GFX11-NEXT: v_cmp_u_f64_e64 s2, v[6:7], v[14:15] -; GFX11-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v1, v17, 0x7ff80000, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v2, v8, 0, s0 -; GFX11-NEXT: v_cndmask_b32_e64 v3, v9, 0x7ff80000, s0 -; GFX11-NEXT: v_cndmask_b32_e64 v4, v10, 0, s1 -; GFX11-NEXT: v_cndmask_b32_e64 v5, v11, 0x7ff80000, s1 -; GFX11-NEXT: v_cndmask_b32_e64 v6, v12, 0, s2 -; GFX11-NEXT: v_cndmask_b32_e64 v7, v13, 0x7ff80000, s2 +; GFX11-NEXT: v_cmp_o_f64_e64 s2, v[6:7], v[14:15] +; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v16, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v17, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, v8, s0 +; GFX11-NEXT: v_cndmask_b32_e64 v3, 0x7ff80000, v9, s0 +; GFX11-NEXT: v_cndmask_b32_e64 v4, 0, v10, s1 +; GFX11-NEXT: v_cndmask_b32_e64 v5, 0x7ff80000, v11, s1 +; GFX11-NEXT: v_cndmask_b32_e64 v6, 0, v12, s2 +; GFX11-NEXT: v_cndmask_b32_e64 v7, 0x7ff80000, v13, s2 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: v_minimum_v4f64: @@ -1530,134 +1563,134 @@ define <4 x double> @v_minimum_v4f64__nsz(<4 x double> %src0, <4 x double> %src1 ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: v_min_f64 v[16:17], v[0:1], v[8:9] -; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9] +; GFX7-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[8:9] ; GFX7-NEXT: v_min_f64 v[8:9], v[2:3], v[10:11] -; GFX7-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[10:11] +; GFX7-NEXT: v_cmp_o_f64_e64 s[4:5], v[2:3], v[10:11] ; GFX7-NEXT: v_min_f64 v[10:11], v[4:5], v[12:13] -; GFX7-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[12:13] +; GFX7-NEXT: v_cmp_o_f64_e64 s[6:7], v[4:5], v[12:13] ; GFX7-NEXT: v_min_f64 v[12:13], v[6:7], v[14:15] -; GFX7-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[14:15] +; GFX7-NEXT: v_cmp_o_f64_e64 s[8:9], v[6:7], v[14:15] ; GFX7-NEXT: v_mov_b32_e32 v7, 0x7ff80000 -; GFX7-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc -; GFX7-NEXT: v_cndmask_b32_e32 v1, v17, v7, vcc -; GFX7-NEXT: v_cndmask_b32_e64 v2, v8, 0, s[4:5] -; GFX7-NEXT: v_cndmask_b32_e64 v3, v9, v7, s[4:5] -; GFX7-NEXT: v_cndmask_b32_e64 v4, v10, 0, s[6:7] -; GFX7-NEXT: v_cndmask_b32_e64 v5, v11, v7, s[6:7] -; GFX7-NEXT: v_cndmask_b32_e64 v6, v12, 0, s[8:9] -; GFX7-NEXT: v_cndmask_b32_e64 v7, v13, v7, s[8:9] +; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v16, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v1, v7, v17, vcc +; GFX7-NEXT: v_cndmask_b32_e64 v2, 0, v8, s[4:5] +; GFX7-NEXT: v_cndmask_b32_e64 v3, v7, v9, s[4:5] +; GFX7-NEXT: v_cndmask_b32_e64 v4, 0, v10, s[6:7] +; GFX7-NEXT: v_cndmask_b32_e64 v5, v7, v11, s[6:7] +; GFX7-NEXT: v_cndmask_b32_e64 v6, 0, v12, s[8:9] +; GFX7-NEXT: v_cndmask_b32_e64 v7, v7, v13, s[8:9] ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_minimum_v4f64__nsz: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_min_f64 v[16:17], v[0:1], v[8:9] -; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9] +; GFX8-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[8:9] ; GFX8-NEXT: v_min_f64 v[8:9], v[2:3], v[10:11] -; GFX8-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[10:11] +; GFX8-NEXT: v_cmp_o_f64_e64 s[4:5], v[2:3], v[10:11] ; GFX8-NEXT: v_min_f64 v[10:11], v[4:5], v[12:13] -; GFX8-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[12:13] +; GFX8-NEXT: v_cmp_o_f64_e64 s[6:7], v[4:5], v[12:13] ; GFX8-NEXT: v_min_f64 v[12:13], v[6:7], v[14:15] -; GFX8-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[14:15] +; GFX8-NEXT: v_cmp_o_f64_e64 s[8:9], v[6:7], v[14:15] ; GFX8-NEXT: v_mov_b32_e32 v7, 0x7ff80000 -; GFX8-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc -; GFX8-NEXT: v_cndmask_b32_e32 v1, v17, v7, vcc -; GFX8-NEXT: v_cndmask_b32_e64 v2, v8, 0, s[4:5] -; GFX8-NEXT: v_cndmask_b32_e64 v3, v9, v7, s[4:5] -; GFX8-NEXT: v_cndmask_b32_e64 v4, v10, 0, s[6:7] -; GFX8-NEXT: v_cndmask_b32_e64 v5, v11, v7, s[6:7] -; GFX8-NEXT: v_cndmask_b32_e64 v6, v12, 0, s[8:9] -; GFX8-NEXT: v_cndmask_b32_e64 v7, v13, v7, s[8:9] +; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v16, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v1, v7, v17, vcc +; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, v8, s[4:5] +; GFX8-NEXT: v_cndmask_b32_e64 v3, v7, v9, s[4:5] +; GFX8-NEXT: v_cndmask_b32_e64 v4, 0, v10, s[6:7] +; GFX8-NEXT: v_cndmask_b32_e64 v5, v7, v11, s[6:7] +; GFX8-NEXT: v_cndmask_b32_e64 v6, 0, v12, s[8:9] +; GFX8-NEXT: v_cndmask_b32_e64 v7, v7, v13, s[8:9] ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-LABEL: v_minimum_v4f64__nsz: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: v_min_f64 v[16:17], v[0:1], v[8:9] -; GFX900-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9] +; GFX900-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[8:9] ; GFX900-NEXT: v_min_f64 v[8:9], v[2:3], v[10:11] -; GFX900-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[10:11] +; GFX900-NEXT: v_cmp_o_f64_e64 s[4:5], v[2:3], v[10:11] ; GFX900-NEXT: v_min_f64 v[10:11], v[4:5], v[12:13] -; GFX900-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[12:13] +; GFX900-NEXT: v_cmp_o_f64_e64 s[6:7], v[4:5], v[12:13] ; GFX900-NEXT: v_min_f64 v[12:13], v[6:7], v[14:15] -; GFX900-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[14:15] +; GFX900-NEXT: v_cmp_o_f64_e64 s[8:9], v[6:7], v[14:15] ; GFX900-NEXT: v_mov_b32_e32 v7, 0x7ff80000 -; GFX900-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc -; GFX900-NEXT: v_cndmask_b32_e32 v1, v17, v7, vcc -; GFX900-NEXT: v_cndmask_b32_e64 v2, v8, 0, s[4:5] -; GFX900-NEXT: v_cndmask_b32_e64 v3, v9, v7, s[4:5] -; GFX900-NEXT: v_cndmask_b32_e64 v4, v10, 0, s[6:7] -; GFX900-NEXT: v_cndmask_b32_e64 v5, v11, v7, s[6:7] -; GFX900-NEXT: v_cndmask_b32_e64 v6, v12, 0, s[8:9] -; GFX900-NEXT: v_cndmask_b32_e64 v7, v13, v7, s[8:9] +; GFX900-NEXT: v_cndmask_b32_e32 v0, 0, v16, vcc +; GFX900-NEXT: v_cndmask_b32_e32 v1, v7, v17, vcc +; GFX900-NEXT: v_cndmask_b32_e64 v2, 0, v8, s[4:5] +; GFX900-NEXT: v_cndmask_b32_e64 v3, v7, v9, s[4:5] +; GFX900-NEXT: v_cndmask_b32_e64 v4, 0, v10, s[6:7] +; GFX900-NEXT: v_cndmask_b32_e64 v5, v7, v11, s[6:7] +; GFX900-NEXT: v_cndmask_b32_e64 v6, 0, v12, s[8:9] +; GFX900-NEXT: v_cndmask_b32_e64 v7, v7, v13, s[8:9] ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_minimum_v4f64__nsz: ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX950-NEXT: v_min_f64 v[16:17], v[0:1], v[8:9] -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9] +; GFX950-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[8:9] ; GFX950-NEXT: v_min_f64 v[8:9], v[2:3], v[10:11] ; GFX950-NEXT: s_nop 0 -; GFX950-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v0, 0, v16, vcc ; GFX950-NEXT: v_mov_b32_e32 v16, 0x7ff80000 -; GFX950-NEXT: v_cndmask_b32_e32 v1, v17, v16, vcc -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[10:11] +; GFX950-NEXT: v_cndmask_b32_e32 v1, v16, v17, vcc +; GFX950-NEXT: v_cmp_o_f64_e32 vcc, v[2:3], v[10:11] ; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e64 v2, v8, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v3, v9, v16, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v2, 0, v8, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v3, v16, v9, vcc ; GFX950-NEXT: v_min_f64 v[8:9], v[4:5], v[12:13] -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[12:13] +; GFX950-NEXT: v_cmp_o_f64_e32 vcc, v[4:5], v[12:13] ; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e64 v4, v8, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v5, v9, v16, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v5, v16, v9, vcc ; GFX950-NEXT: v_min_f64 v[8:9], v[6:7], v[14:15] -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[6:7], v[14:15] +; GFX950-NEXT: v_cmp_o_f64_e32 vcc, v[6:7], v[14:15] ; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e64 v6, v8, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v7, v9, v16, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v6, 0, v8, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v7, v16, v9, vcc ; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_minimum_v4f64__nsz: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_min_f64 v[16:17], v[0:1], v[8:9] -; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[8:9] +; GFX10-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[8:9] ; GFX10-NEXT: v_min_f64 v[8:9], v[2:3], v[10:11] -; GFX10-NEXT: v_cmp_u_f64_e64 s4, v[2:3], v[10:11] +; GFX10-NEXT: v_cmp_o_f64_e64 s4, v[2:3], v[10:11] ; GFX10-NEXT: v_min_f64 v[10:11], v[4:5], v[12:13] -; GFX10-NEXT: v_cmp_u_f64_e64 s5, v[4:5], v[12:13] +; GFX10-NEXT: v_cmp_o_f64_e64 s5, v[4:5], v[12:13] ; GFX10-NEXT: v_min_f64 v[12:13], v[6:7], v[14:15] -; GFX10-NEXT: v_cmp_u_f64_e64 s6, v[6:7], v[14:15] -; GFX10-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v1, v17, 0x7ff80000, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v2, v8, 0, s4 -; GFX10-NEXT: v_cndmask_b32_e64 v3, v9, 0x7ff80000, s4 -; GFX10-NEXT: v_cndmask_b32_e64 v4, v10, 0, s5 -; GFX10-NEXT: v_cndmask_b32_e64 v5, v11, 0x7ff80000, s5 -; GFX10-NEXT: v_cndmask_b32_e64 v6, v12, 0, s6 -; GFX10-NEXT: v_cndmask_b32_e64 v7, v13, 0x7ff80000, s6 +; GFX10-NEXT: v_cmp_o_f64_e64 s6, v[6:7], v[14:15] +; GFX10-NEXT: v_cndmask_b32_e32 v0, 0, v16, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v17, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, v8, s4 +; GFX10-NEXT: v_cndmask_b32_e64 v3, 0x7ff80000, v9, s4 +; GFX10-NEXT: v_cndmask_b32_e64 v4, 0, v10, s5 +; GFX10-NEXT: v_cndmask_b32_e64 v5, 0x7ff80000, v11, s5 +; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, v12, s6 +; GFX10-NEXT: v_cndmask_b32_e64 v7, 0x7ff80000, v13, s6 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_minimum_v4f64__nsz: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_min_f64 v[16:17], v[0:1], v[8:9] -; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[8:9] +; GFX11-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[8:9] ; GFX11-NEXT: v_min_f64 v[8:9], v[2:3], v[10:11] -; GFX11-NEXT: v_cmp_u_f64_e64 s0, v[2:3], v[10:11] +; GFX11-NEXT: v_cmp_o_f64_e64 s0, v[2:3], v[10:11] ; GFX11-NEXT: v_min_f64 v[10:11], v[4:5], v[12:13] -; GFX11-NEXT: v_cmp_u_f64_e64 s1, v[4:5], v[12:13] +; GFX11-NEXT: v_cmp_o_f64_e64 s1, v[4:5], v[12:13] ; GFX11-NEXT: v_min_f64 v[12:13], v[6:7], v[14:15] -; GFX11-NEXT: v_cmp_u_f64_e64 s2, v[6:7], v[14:15] -; GFX11-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v1, v17, 0x7ff80000, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v2, v8, 0, s0 -; GFX11-NEXT: v_cndmask_b32_e64 v3, v9, 0x7ff80000, s0 -; GFX11-NEXT: v_cndmask_b32_e64 v4, v10, 0, s1 -; GFX11-NEXT: v_cndmask_b32_e64 v5, v11, 0x7ff80000, s1 -; GFX11-NEXT: v_cndmask_b32_e64 v6, v12, 0, s2 -; GFX11-NEXT: v_cndmask_b32_e64 v7, v13, 0x7ff80000, s2 +; GFX11-NEXT: v_cmp_o_f64_e64 s2, v[6:7], v[14:15] +; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v16, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v17, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, v8, s0 +; GFX11-NEXT: v_cndmask_b32_e64 v3, 0x7ff80000, v9, s0 +; GFX11-NEXT: v_cndmask_b32_e64 v4, 0, v10, s1 +; GFX11-NEXT: v_cndmask_b32_e64 v5, 0x7ff80000, v11, s1 +; GFX11-NEXT: v_cndmask_b32_e64 v6, 0, v12, s2 +; GFX11-NEXT: v_cndmask_b32_e64 v7, 0x7ff80000, v13, s2 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: v_minimum_v4f64__nsz: @@ -1743,14 +1776,24 @@ define <8 x double> @v_minimum_v8f64(<8 x double> %src0, <8 x double> %src1) { ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 +<<<<<<< HEAD ; GFX7-NEXT: v_min_f64 v[32:33], v[0:1], v[16:17] ; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[16:17] ; GFX7-NEXT: v_min_f64 v[16:17], v[2:3], v[18:19] ; GFX7-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[18:19] +======= +; GFX7-NEXT: v_min_f64 v[32:33], v[2:3], v[18:19] +; GFX7-NEXT: v_cmp_o_f64_e32 vcc, v[2:3], v[18:19] +; GFX7-NEXT: v_min_f64 v[18:19], v[4:5], v[20:21] +; GFX7-NEXT: v_cmp_o_f64_e64 s[4:5], v[4:5], v[20:21] +; GFX7-NEXT: v_min_f64 v[2:3], v[0:1], v[16:17] +; GFX7-NEXT: v_cmp_o_f64_e64 s[8:9], v[0:1], v[16:17] +>>>>>>> 1336afc5defe (update tests) ; GFX7-NEXT: v_mov_b32_e32 v34, 0x7ff80000 ; GFX7-NEXT: v_min_f64 v[18:19], v[4:5], v[20:21] ; GFX7-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[20:21] ; GFX7-NEXT: v_min_f64 v[20:21], v[6:7], v[22:23] +<<<<<<< HEAD ; GFX7-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[22:23] ; GFX7-NEXT: v_min_f64 v[22:23], v[8:9], v[24:25] ; GFX7-NEXT: v_cmp_u_f64_e64 s[10:11], v[8:9], v[24:25] @@ -1777,20 +1820,58 @@ define <8 x double> @v_minimum_v8f64(<8 x double> %src0, <8 x double> %src1) { ; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[14:15], v[30:31] ; GFX7-NEXT: v_cndmask_b32_e64 v14, v16, 0, vcc ; GFX7-NEXT: v_cndmask_b32_e32 v15, v17, v34, vcc +======= +; GFX7-NEXT: v_cmp_o_f64_e64 s[6:7], v[6:7], v[22:23] +; GFX7-NEXT: v_min_f64 v[16:17], v[8:9], v[24:25] +; GFX7-NEXT: v_cmp_o_f64_e64 s[10:11], v[8:9], v[24:25] +; GFX7-NEXT: v_min_f64 v[22:23], v[10:11], v[26:27] +; GFX7-NEXT: v_cmp_o_f64_e64 s[12:13], v[10:11], v[26:27] +; GFX7-NEXT: v_min_f64 v[24:25], v[12:13], v[28:29] +; GFX7-NEXT: v_cmp_o_f64_e64 s[14:15], v[12:13], v[28:29] +; GFX7-NEXT: v_cndmask_b32_e64 v0, 0, v2, s[8:9] +; GFX7-NEXT: v_cndmask_b32_e64 v1, v34, v3, s[8:9] +; GFX7-NEXT: v_cndmask_b32_e32 v2, 0, v32, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v3, v34, v33, vcc +; GFX7-NEXT: v_cndmask_b32_e64 v4, 0, v18, s[4:5] +; GFX7-NEXT: v_cndmask_b32_e64 v5, v34, v19, s[4:5] +; GFX7-NEXT: v_cndmask_b32_e64 v6, 0, v20, s[6:7] +; GFX7-NEXT: v_cndmask_b32_e64 v7, v34, v21, s[6:7] +; GFX7-NEXT: v_cndmask_b32_e64 v8, 0, v16, s[10:11] +; GFX7-NEXT: v_cndmask_b32_e64 v9, v34, v17, s[10:11] +; GFX7-NEXT: v_cndmask_b32_e64 v10, 0, v22, s[12:13] +; GFX7-NEXT: v_cndmask_b32_e64 v11, v34, v23, s[12:13] +; GFX7-NEXT: v_cndmask_b32_e64 v12, 0, v24, s[14:15] +; GFX7-NEXT: v_cndmask_b32_e64 v13, v34, v25, s[14:15] +; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: v_min_f64 v[18:19], v[14:15], v[30:31] +; GFX7-NEXT: v_cmp_o_f64_e32 vcc, v[14:15], v[30:31] +; GFX7-NEXT: v_cndmask_b32_e32 v14, 0, v18, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v15, v34, v19, vcc +>>>>>>> 1336afc5defe (update tests) ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_minimum_v8f64: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 +<<<<<<< HEAD ; GFX8-NEXT: v_min_f64 v[32:33], v[0:1], v[16:17] ; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[16:17] ; GFX8-NEXT: v_min_f64 v[16:17], v[2:3], v[18:19] ; GFX8-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[18:19] +======= +; GFX8-NEXT: v_min_f64 v[32:33], v[2:3], v[18:19] +; GFX8-NEXT: v_cmp_o_f64_e32 vcc, v[2:3], v[18:19] +; GFX8-NEXT: v_min_f64 v[18:19], v[4:5], v[20:21] +; GFX8-NEXT: v_cmp_o_f64_e64 s[4:5], v[4:5], v[20:21] +; GFX8-NEXT: v_min_f64 v[2:3], v[0:1], v[16:17] +; GFX8-NEXT: v_cmp_o_f64_e64 s[8:9], v[0:1], v[16:17] +>>>>>>> 1336afc5defe (update tests) ; GFX8-NEXT: v_mov_b32_e32 v34, 0x7ff80000 ; GFX8-NEXT: v_min_f64 v[18:19], v[4:5], v[20:21] ; GFX8-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[20:21] ; GFX8-NEXT: v_min_f64 v[20:21], v[6:7], v[22:23] +<<<<<<< HEAD ; GFX8-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[22:23] ; GFX8-NEXT: v_min_f64 v[22:23], v[8:9], v[24:25] ; GFX8-NEXT: v_cmp_u_f64_e64 s[10:11], v[8:9], v[24:25] @@ -1817,20 +1898,58 @@ define <8 x double> @v_minimum_v8f64(<8 x double> %src0, <8 x double> %src1) { ; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[14:15], v[30:31] ; GFX8-NEXT: v_cndmask_b32_e64 v14, v16, 0, vcc ; GFX8-NEXT: v_cndmask_b32_e32 v15, v17, v34, vcc +======= +; GFX8-NEXT: v_cmp_o_f64_e64 s[6:7], v[6:7], v[22:23] +; GFX8-NEXT: v_min_f64 v[16:17], v[8:9], v[24:25] +; GFX8-NEXT: v_cmp_o_f64_e64 s[10:11], v[8:9], v[24:25] +; GFX8-NEXT: v_min_f64 v[22:23], v[10:11], v[26:27] +; GFX8-NEXT: v_cmp_o_f64_e64 s[12:13], v[10:11], v[26:27] +; GFX8-NEXT: v_min_f64 v[24:25], v[12:13], v[28:29] +; GFX8-NEXT: v_cmp_o_f64_e64 s[14:15], v[12:13], v[28:29] +; GFX8-NEXT: v_cndmask_b32_e64 v0, 0, v2, s[8:9] +; GFX8-NEXT: v_cndmask_b32_e64 v1, v34, v3, s[8:9] +; GFX8-NEXT: v_cndmask_b32_e32 v2, 0, v32, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v3, v34, v33, vcc +; GFX8-NEXT: v_cndmask_b32_e64 v4, 0, v18, s[4:5] +; GFX8-NEXT: v_cndmask_b32_e64 v5, v34, v19, s[4:5] +; GFX8-NEXT: v_cndmask_b32_e64 v6, 0, v20, s[6:7] +; GFX8-NEXT: v_cndmask_b32_e64 v7, v34, v21, s[6:7] +; GFX8-NEXT: v_cndmask_b32_e64 v8, 0, v16, s[10:11] +; GFX8-NEXT: v_cndmask_b32_e64 v9, v34, v17, s[10:11] +; GFX8-NEXT: v_cndmask_b32_e64 v10, 0, v22, s[12:13] +; GFX8-NEXT: v_cndmask_b32_e64 v11, v34, v23, s[12:13] +; GFX8-NEXT: v_cndmask_b32_e64 v12, 0, v24, s[14:15] +; GFX8-NEXT: v_cndmask_b32_e64 v13, v34, v25, s[14:15] +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: v_min_f64 v[18:19], v[14:15], v[30:31] +; GFX8-NEXT: v_cmp_o_f64_e32 vcc, v[14:15], v[30:31] +; GFX8-NEXT: v_cndmask_b32_e32 v14, 0, v18, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v15, v34, v19, vcc +>>>>>>> 1336afc5defe (update tests) ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-LABEL: v_minimum_v8f64: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 +<<<<<<< HEAD ; GFX900-NEXT: v_min_f64 v[32:33], v[0:1], v[16:17] ; GFX900-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[16:17] ; GFX900-NEXT: v_min_f64 v[16:17], v[2:3], v[18:19] ; GFX900-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[18:19] +======= +; GFX900-NEXT: v_min_f64 v[32:33], v[2:3], v[18:19] +; GFX900-NEXT: v_cmp_o_f64_e32 vcc, v[2:3], v[18:19] +; GFX900-NEXT: v_min_f64 v[18:19], v[4:5], v[20:21] +; GFX900-NEXT: v_cmp_o_f64_e64 s[4:5], v[4:5], v[20:21] +; GFX900-NEXT: v_min_f64 v[2:3], v[0:1], v[16:17] +; GFX900-NEXT: v_cmp_o_f64_e64 s[8:9], v[0:1], v[16:17] +>>>>>>> 1336afc5defe (update tests) ; GFX900-NEXT: v_mov_b32_e32 v34, 0x7ff80000 ; GFX900-NEXT: v_min_f64 v[18:19], v[4:5], v[20:21] ; GFX900-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[20:21] ; GFX900-NEXT: v_min_f64 v[20:21], v[6:7], v[22:23] +<<<<<<< HEAD ; GFX900-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[22:23] ; GFX900-NEXT: v_min_f64 v[22:23], v[8:9], v[24:25] ; GFX900-NEXT: v_cmp_u_f64_e64 s[10:11], v[8:9], v[24:25] @@ -1857,6 +1976,34 @@ define <8 x double> @v_minimum_v8f64(<8 x double> %src0, <8 x double> %src1) { ; GFX900-NEXT: v_cmp_u_f64_e32 vcc, v[14:15], v[30:31] ; GFX900-NEXT: v_cndmask_b32_e64 v14, v16, 0, vcc ; GFX900-NEXT: v_cndmask_b32_e32 v15, v17, v34, vcc +======= +; GFX900-NEXT: v_cmp_o_f64_e64 s[6:7], v[6:7], v[22:23] +; GFX900-NEXT: v_min_f64 v[16:17], v[8:9], v[24:25] +; GFX900-NEXT: v_cmp_o_f64_e64 s[10:11], v[8:9], v[24:25] +; GFX900-NEXT: v_min_f64 v[22:23], v[10:11], v[26:27] +; GFX900-NEXT: v_cmp_o_f64_e64 s[12:13], v[10:11], v[26:27] +; GFX900-NEXT: v_min_f64 v[24:25], v[12:13], v[28:29] +; GFX900-NEXT: v_cmp_o_f64_e64 s[14:15], v[12:13], v[28:29] +; GFX900-NEXT: v_cndmask_b32_e64 v0, 0, v2, s[8:9] +; GFX900-NEXT: v_cndmask_b32_e64 v1, v34, v3, s[8:9] +; GFX900-NEXT: v_cndmask_b32_e32 v2, 0, v32, vcc +; GFX900-NEXT: v_cndmask_b32_e32 v3, v34, v33, vcc +; GFX900-NEXT: v_cndmask_b32_e64 v4, 0, v18, s[4:5] +; GFX900-NEXT: v_cndmask_b32_e64 v5, v34, v19, s[4:5] +; GFX900-NEXT: v_cndmask_b32_e64 v6, 0, v20, s[6:7] +; GFX900-NEXT: v_cndmask_b32_e64 v7, v34, v21, s[6:7] +; GFX900-NEXT: v_cndmask_b32_e64 v8, 0, v16, s[10:11] +; GFX900-NEXT: v_cndmask_b32_e64 v9, v34, v17, s[10:11] +; GFX900-NEXT: v_cndmask_b32_e64 v10, 0, v22, s[12:13] +; GFX900-NEXT: v_cndmask_b32_e64 v11, v34, v23, s[12:13] +; GFX900-NEXT: v_cndmask_b32_e64 v12, 0, v24, s[14:15] +; GFX900-NEXT: v_cndmask_b32_e64 v13, v34, v25, s[14:15] +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: v_min_f64 v[18:19], v[14:15], v[30:31] +; GFX900-NEXT: v_cmp_o_f64_e32 vcc, v[14:15], v[30:31] +; GFX900-NEXT: v_cndmask_b32_e32 v14, 0, v18, vcc +; GFX900-NEXT: v_cndmask_b32_e32 v15, v34, v19, vcc +>>>>>>> 1336afc5defe (update tests) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_minimum_v8f64: @@ -1865,42 +2012,42 @@ define <8 x double> @v_minimum_v8f64(<8 x double> %src0, <8 x double> %src1) { ; GFX950-NEXT: scratch_load_dword v31, off, s32 ; GFX950-NEXT: v_mov_b32_e32 v54, 0x7ff80000 ; GFX950-NEXT: v_min_f64 v[32:33], v[0:1], v[16:17] -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[16:17] +; GFX950-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[16:17] ; GFX950-NEXT: v_min_f64 v[34:35], v[2:3], v[18:19] ; GFX950-NEXT: v_min_f64 v[36:37], v[4:5], v[20:21] -; GFX950-NEXT: v_cndmask_b32_e64 v0, v32, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v1, v33, v54, vcc -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[18:19] +; GFX950-NEXT: v_cndmask_b32_e32 v0, 0, v32, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v1, v54, v33, vcc +; GFX950-NEXT: v_cmp_o_f64_e32 vcc, v[2:3], v[18:19] ; GFX950-NEXT: v_min_f64 v[38:39], v[6:7], v[22:23] ; GFX950-NEXT: v_min_f64 v[48:49], v[8:9], v[24:25] -; GFX950-NEXT: v_cndmask_b32_e64 v2, v34, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v3, v35, v54, vcc -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[20:21] +; GFX950-NEXT: v_cndmask_b32_e32 v2, 0, v34, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v3, v54, v35, vcc +; GFX950-NEXT: v_cmp_o_f64_e32 vcc, v[4:5], v[20:21] ; GFX950-NEXT: v_min_f64 v[50:51], v[10:11], v[26:27] ; GFX950-NEXT: v_min_f64 v[52:53], v[12:13], v[28:29] -; GFX950-NEXT: v_cndmask_b32_e64 v4, v36, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v5, v37, v54, vcc -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[6:7], v[22:23] +; GFX950-NEXT: v_cndmask_b32_e32 v4, 0, v36, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v5, v54, v37, vcc +; GFX950-NEXT: v_cmp_o_f64_e32 vcc, v[6:7], v[22:23] ; GFX950-NEXT: s_waitcnt vmcnt(0) ; GFX950-NEXT: v_min_f64 v[16:17], v[14:15], v[30:31] -; GFX950-NEXT: v_cndmask_b32_e64 v6, v38, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v7, v39, v54, vcc -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[8:9], v[24:25] +; GFX950-NEXT: v_cndmask_b32_e32 v6, 0, v38, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v7, v54, v39, vcc +; GFX950-NEXT: v_cmp_o_f64_e32 vcc, v[8:9], v[24:25] ; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e64 v8, v48, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v9, v49, v54, vcc -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[10:11], v[26:27] +; GFX950-NEXT: v_cndmask_b32_e32 v8, 0, v48, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v9, v54, v49, vcc +; GFX950-NEXT: v_cmp_o_f64_e32 vcc, v[10:11], v[26:27] ; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e64 v10, v50, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v11, v51, v54, vcc -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[12:13], v[28:29] +; GFX950-NEXT: v_cndmask_b32_e32 v10, 0, v50, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v11, v54, v51, vcc +; GFX950-NEXT: v_cmp_o_f64_e32 vcc, v[12:13], v[28:29] ; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e64 v12, v52, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v13, v53, v54, vcc -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[14:15], v[30:31] +; GFX950-NEXT: v_cndmask_b32_e32 v12, 0, v52, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v13, v54, v53, vcc +; GFX950-NEXT: v_cmp_o_f64_e32 vcc, v[14:15], v[30:31] ; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e64 v14, v16, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v15, v17, v54, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v14, 0, v16, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v15, v54, v17, vcc ; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_minimum_v8f64: @@ -1908,38 +2055,38 @@ define <8 x double> @v_minimum_v8f64(<8 x double> %src0, <8 x double> %src1) { ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; GFX10-NEXT: v_min_f64 v[32:33], v[0:1], v[16:17] -; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[16:17] +; GFX10-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[16:17] ; GFX10-NEXT: v_min_f64 v[16:17], v[2:3], v[18:19] -; GFX10-NEXT: v_cmp_u_f64_e64 s4, v[2:3], v[18:19] +; GFX10-NEXT: v_cmp_o_f64_e64 s4, v[2:3], v[18:19] ; GFX10-NEXT: v_min_f64 v[18:19], v[4:5], v[20:21] -; GFX10-NEXT: v_cmp_u_f64_e64 s5, v[4:5], v[20:21] +; GFX10-NEXT: v_cmp_o_f64_e64 s5, v[4:5], v[20:21] ; GFX10-NEXT: v_min_f64 v[20:21], v[6:7], v[22:23] -; GFX10-NEXT: v_cmp_u_f64_e64 s6, v[6:7], v[22:23] +; GFX10-NEXT: v_cmp_o_f64_e64 s6, v[6:7], v[22:23] ; GFX10-NEXT: v_min_f64 v[22:23], v[8:9], v[24:25] -; GFX10-NEXT: v_cmp_u_f64_e64 s7, v[8:9], v[24:25] +; GFX10-NEXT: v_cmp_o_f64_e64 s7, v[8:9], v[24:25] ; GFX10-NEXT: v_min_f64 v[24:25], v[10:11], v[26:27] -; GFX10-NEXT: v_cmp_u_f64_e64 s8, v[10:11], v[26:27] +; GFX10-NEXT: v_cmp_o_f64_e64 s8, v[10:11], v[26:27] ; GFX10-NEXT: v_min_f64 v[26:27], v[12:13], v[28:29] -; GFX10-NEXT: v_cmp_u_f64_e64 s9, v[12:13], v[28:29] -; GFX10-NEXT: v_cndmask_b32_e64 v0, v32, 0, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v1, v33, 0x7ff80000, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v2, v16, 0, s4 -; GFX10-NEXT: v_cndmask_b32_e64 v3, v17, 0x7ff80000, s4 -; GFX10-NEXT: v_cndmask_b32_e64 v4, v18, 0, s5 -; GFX10-NEXT: v_cndmask_b32_e64 v5, v19, 0x7ff80000, s5 -; GFX10-NEXT: v_cndmask_b32_e64 v6, v20, 0, s6 -; GFX10-NEXT: v_cndmask_b32_e64 v7, v21, 0x7ff80000, s6 -; GFX10-NEXT: v_cndmask_b32_e64 v8, v22, 0, s7 -; GFX10-NEXT: v_cndmask_b32_e64 v9, v23, 0x7ff80000, s7 -; GFX10-NEXT: v_cndmask_b32_e64 v10, v24, 0, s8 -; GFX10-NEXT: v_cndmask_b32_e64 v11, v25, 0x7ff80000, s8 -; GFX10-NEXT: v_cndmask_b32_e64 v12, v26, 0, s9 -; GFX10-NEXT: v_cndmask_b32_e64 v13, v27, 0x7ff80000, s9 +; GFX10-NEXT: v_cmp_o_f64_e64 s9, v[12:13], v[28:29] +; GFX10-NEXT: v_cndmask_b32_e32 v0, 0, v32, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v33, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, v16, s4 +; GFX10-NEXT: v_cndmask_b32_e64 v3, 0x7ff80000, v17, s4 +; GFX10-NEXT: v_cndmask_b32_e64 v4, 0, v18, s5 +; GFX10-NEXT: v_cndmask_b32_e64 v5, 0x7ff80000, v19, s5 +; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, v20, s6 +; GFX10-NEXT: v_cndmask_b32_e64 v7, 0x7ff80000, v21, s6 +; GFX10-NEXT: v_cndmask_b32_e64 v8, 0, v22, s7 +; GFX10-NEXT: v_cndmask_b32_e64 v9, 0x7ff80000, v23, s7 +; GFX10-NEXT: v_cndmask_b32_e64 v10, 0, v24, s8 +; GFX10-NEXT: v_cndmask_b32_e64 v11, 0x7ff80000, v25, s8 +; GFX10-NEXT: v_cndmask_b32_e64 v12, 0, v26, s9 +; GFX10-NEXT: v_cndmask_b32_e64 v13, 0x7ff80000, v27, s9 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_min_f64 v[28:29], v[14:15], v[30:31] -; GFX10-NEXT: v_cmp_u_f64_e64 s10, v[14:15], v[30:31] -; GFX10-NEXT: v_cndmask_b32_e64 v14, v28, 0, s10 -; GFX10-NEXT: v_cndmask_b32_e64 v15, v29, 0x7ff80000, s10 +; GFX10-NEXT: v_cmp_o_f64_e64 s10, v[14:15], v[30:31] +; GFX10-NEXT: v_cndmask_b32_e64 v14, 0, v28, s10 +; GFX10-NEXT: v_cndmask_b32_e64 v15, 0x7ff80000, v29, s10 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_minimum_v8f64: @@ -1947,39 +2094,39 @@ define <8 x double> @v_minimum_v8f64(<8 x double> %src0, <8 x double> %src1) { ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: scratch_load_b32 v31, off, s32 ; GFX11-NEXT: v_min_f64 v[32:33], v[0:1], v[16:17] -; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[16:17] +; GFX11-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[16:17] ; GFX11-NEXT: v_min_f64 v[16:17], v[2:3], v[18:19] -; GFX11-NEXT: v_cmp_u_f64_e64 s0, v[2:3], v[18:19] +; GFX11-NEXT: v_cmp_o_f64_e64 s0, v[2:3], v[18:19] ; GFX11-NEXT: v_min_f64 v[18:19], v[4:5], v[20:21] -; GFX11-NEXT: v_cmp_u_f64_e64 s1, v[4:5], v[20:21] +; GFX11-NEXT: v_cmp_o_f64_e64 s1, v[4:5], v[20:21] ; GFX11-NEXT: v_min_f64 v[20:21], v[6:7], v[22:23] -; GFX11-NEXT: v_cmp_u_f64_e64 s2, v[6:7], v[22:23] +; GFX11-NEXT: v_cmp_o_f64_e64 s2, v[6:7], v[22:23] ; GFX11-NEXT: v_min_f64 v[22:23], v[8:9], v[24:25] -; GFX11-NEXT: v_cmp_u_f64_e64 s3, v[8:9], v[24:25] +; GFX11-NEXT: v_cmp_o_f64_e64 s3, v[8:9], v[24:25] ; GFX11-NEXT: v_min_f64 v[24:25], v[10:11], v[26:27] -; GFX11-NEXT: v_cmp_u_f64_e64 s4, v[10:11], v[26:27] +; GFX11-NEXT: v_cmp_o_f64_e64 s4, v[10:11], v[26:27] ; GFX11-NEXT: v_min_f64 v[26:27], v[12:13], v[28:29] -; GFX11-NEXT: v_cmp_u_f64_e64 s5, v[12:13], v[28:29] -; GFX11-NEXT: v_cndmask_b32_e64 v0, v32, 0, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v1, v33, 0x7ff80000, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v2, v16, 0, s0 -; GFX11-NEXT: v_cndmask_b32_e64 v3, v17, 0x7ff80000, s0 -; GFX11-NEXT: v_cndmask_b32_e64 v4, v18, 0, s1 -; GFX11-NEXT: v_cndmask_b32_e64 v5, v19, 0x7ff80000, s1 -; GFX11-NEXT: v_cndmask_b32_e64 v6, v20, 0, s2 -; GFX11-NEXT: v_cndmask_b32_e64 v7, v21, 0x7ff80000, s2 -; GFX11-NEXT: v_cndmask_b32_e64 v8, v22, 0, s3 -; GFX11-NEXT: v_cndmask_b32_e64 v9, v23, 0x7ff80000, s3 -; GFX11-NEXT: v_cndmask_b32_e64 v10, v24, 0, s4 -; GFX11-NEXT: v_cndmask_b32_e64 v11, v25, 0x7ff80000, s4 -; GFX11-NEXT: v_cndmask_b32_e64 v12, v26, 0, s5 -; GFX11-NEXT: v_cndmask_b32_e64 v13, v27, 0x7ff80000, s5 +; GFX11-NEXT: v_cmp_o_f64_e64 s5, v[12:13], v[28:29] +; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v32, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v33, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, v16, s0 +; GFX11-NEXT: v_cndmask_b32_e64 v3, 0x7ff80000, v17, s0 +; GFX11-NEXT: v_cndmask_b32_e64 v4, 0, v18, s1 +; GFX11-NEXT: v_cndmask_b32_e64 v5, 0x7ff80000, v19, s1 +; GFX11-NEXT: v_cndmask_b32_e64 v6, 0, v20, s2 +; GFX11-NEXT: v_cndmask_b32_e64 v7, 0x7ff80000, v21, s2 +; GFX11-NEXT: v_cndmask_b32_e64 v8, 0, v22, s3 +; GFX11-NEXT: v_cndmask_b32_e64 v9, 0x7ff80000, v23, s3 +; GFX11-NEXT: v_cndmask_b32_e64 v10, 0, v24, s4 +; GFX11-NEXT: v_cndmask_b32_e64 v11, 0x7ff80000, v25, s4 +; GFX11-NEXT: v_cndmask_b32_e64 v12, 0, v26, s5 +; GFX11-NEXT: v_cndmask_b32_e64 v13, 0x7ff80000, v27, s5 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_min_f64 v[28:29], v[14:15], v[30:31] -; GFX11-NEXT: v_cmp_u_f64_e64 s6, v[14:15], v[30:31] +; GFX11-NEXT: v_cmp_o_f64_e64 s6, v[14:15], v[30:31] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-NEXT: v_cndmask_b32_e64 v14, v28, 0, s6 -; GFX11-NEXT: v_cndmask_b32_e64 v15, v29, 0x7ff80000, s6 +; GFX11-NEXT: v_cndmask_b32_e64 v14, 0, v28, s6 +; GFX11-NEXT: v_cndmask_b32_e64 v15, 0x7ff80000, v29, s6 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: v_minimum_v8f64: @@ -2011,117 +2158,117 @@ define <16 x double> @v_minimum_v16f64(<16 x double> %src0, <16 x double> %src1) ; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:8 ; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4 ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[31:32] +; GFX7-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[31:32] ; GFX7-NEXT: v_min_f64 v[0:1], v[0:1], v[31:32] ; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:16 ; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:12 -; GFX7-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[31:32] +; GFX7-NEXT: v_cmp_o_f64_e64 s[4:5], v[2:3], v[31:32] ; GFX7-NEXT: v_min_f64 v[2:3], v[2:3], v[31:32] ; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:24 ; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:20 -; GFX7-NEXT: v_cndmask_b32_e64 v2, v2, 0, s[4:5] +; GFX7-NEXT: v_cndmask_b32_e64 v2, 0, v2, s[4:5] ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[31:32] +; GFX7-NEXT: v_cmp_o_f64_e64 s[6:7], v[4:5], v[31:32] ; GFX7-NEXT: v_min_f64 v[4:5], v[4:5], v[31:32] ; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:32 ; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:28 -; GFX7-NEXT: v_cndmask_b32_e64 v4, v4, 0, s[6:7] +; GFX7-NEXT: v_cndmask_b32_e64 v4, 0, v4, s[6:7] ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[31:32] +; GFX7-NEXT: v_cmp_o_f64_e64 s[8:9], v[6:7], v[31:32] ; GFX7-NEXT: v_min_f64 v[6:7], v[6:7], v[31:32] ; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:36 ; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:40 -; GFX7-NEXT: v_cndmask_b32_e64 v6, v6, 0, s[8:9] +; GFX7-NEXT: v_cndmask_b32_e64 v6, 0, v6, s[8:9] ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cmp_u_f64_e64 s[10:11], v[8:9], v[31:32] +; GFX7-NEXT: v_cmp_o_f64_e64 s[10:11], v[8:9], v[31:32] ; GFX7-NEXT: v_min_f64 v[8:9], v[8:9], v[31:32] ; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:48 ; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:44 -; GFX7-NEXT: v_cndmask_b32_e64 v8, v8, 0, s[10:11] +; GFX7-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[10:11] ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cmp_u_f64_e64 s[12:13], v[10:11], v[31:32] +; GFX7-NEXT: v_cmp_o_f64_e64 s[12:13], v[10:11], v[31:32] ; GFX7-NEXT: v_min_f64 v[10:11], v[10:11], v[31:32] ; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:56 ; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:52 -; GFX7-NEXT: v_cndmask_b32_e64 v10, v10, 0, s[12:13] +; GFX7-NEXT: v_cndmask_b32_e64 v10, 0, v10, s[12:13] ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cmp_u_f64_e64 s[14:15], v[12:13], v[31:32] +; GFX7-NEXT: v_cmp_o_f64_e64 s[14:15], v[12:13], v[31:32] ; GFX7-NEXT: v_min_f64 v[12:13], v[12:13], v[31:32] ; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:64 ; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:60 -; GFX7-NEXT: v_cndmask_b32_e64 v12, v12, 0, s[14:15] +; GFX7-NEXT: v_cndmask_b32_e64 v12, 0, v12, s[14:15] ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cmp_u_f64_e64 s[16:17], v[14:15], v[31:32] +; GFX7-NEXT: v_cmp_o_f64_e64 s[16:17], v[14:15], v[31:32] ; GFX7-NEXT: v_min_f64 v[14:15], v[14:15], v[31:32] ; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:68 ; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:72 -; GFX7-NEXT: v_cndmask_b32_e64 v14, v14, 0, s[16:17] +; GFX7-NEXT: v_cndmask_b32_e64 v14, 0, v14, s[16:17] ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cmp_u_f64_e64 s[18:19], v[16:17], v[31:32] +; GFX7-NEXT: v_cmp_o_f64_e64 s[18:19], v[16:17], v[31:32] ; GFX7-NEXT: v_min_f64 v[16:17], v[16:17], v[31:32] ; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:80 ; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:76 -; GFX7-NEXT: v_cndmask_b32_e64 v16, v16, 0, s[18:19] +; GFX7-NEXT: v_cndmask_b32_e64 v16, 0, v16, s[18:19] ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cmp_u_f64_e64 s[20:21], v[18:19], v[31:32] +; GFX7-NEXT: v_cmp_o_f64_e64 s[20:21], v[18:19], v[31:32] ; GFX7-NEXT: v_min_f64 v[18:19], v[18:19], v[31:32] ; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:88 ; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:84 -; GFX7-NEXT: v_cndmask_b32_e64 v18, v18, 0, s[20:21] +; GFX7-NEXT: v_cndmask_b32_e64 v18, 0, v18, s[20:21] ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cmp_u_f64_e64 s[22:23], v[20:21], v[31:32] +; GFX7-NEXT: v_cmp_o_f64_e64 s[22:23], v[20:21], v[31:32] ; GFX7-NEXT: v_min_f64 v[20:21], v[20:21], v[31:32] ; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:96 ; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:92 -; GFX7-NEXT: v_cndmask_b32_e64 v20, v20, 0, s[22:23] +; GFX7-NEXT: v_cndmask_b32_e64 v20, 0, v20, s[22:23] ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cmp_u_f64_e64 s[24:25], v[22:23], v[31:32] +; GFX7-NEXT: v_cmp_o_f64_e64 s[24:25], v[22:23], v[31:32] ; GFX7-NEXT: v_min_f64 v[22:23], v[22:23], v[31:32] ; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:100 ; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:104 -; GFX7-NEXT: v_cndmask_b32_e64 v22, v22, 0, s[24:25] +; GFX7-NEXT: v_cndmask_b32_e64 v22, 0, v22, s[24:25] ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cmp_u_f64_e64 s[26:27], v[24:25], v[31:32] +; GFX7-NEXT: v_cmp_o_f64_e64 s[26:27], v[24:25], v[31:32] ; GFX7-NEXT: v_min_f64 v[24:25], v[24:25], v[31:32] ; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:112 ; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:108 -; GFX7-NEXT: v_cndmask_b32_e64 v24, v24, 0, s[26:27] +; GFX7-NEXT: v_cndmask_b32_e64 v24, 0, v24, s[26:27] ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cmp_u_f64_e64 s[28:29], v[26:27], v[31:32] +; GFX7-NEXT: v_cmp_o_f64_e64 s[28:29], v[26:27], v[31:32] ; GFX7-NEXT: v_min_f64 v[26:27], v[26:27], v[31:32] ; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:120 ; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:116 -; GFX7-NEXT: v_cndmask_b32_e64 v26, v26, 0, s[28:29] +; GFX7-NEXT: v_cndmask_b32_e64 v26, 0, v26, s[28:29] ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cmp_u_f64_e64 s[40:41], v[28:29], v[31:32] +; GFX7-NEXT: v_cmp_o_f64_e64 s[40:41], v[28:29], v[31:32] ; GFX7-NEXT: v_min_f64 v[28:29], v[28:29], v[31:32] ; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; GFX7-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:128 ; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:124 -; GFX7-NEXT: v_cndmask_b32_e64 v28, v28, 0, s[40:41] +; GFX7-NEXT: v_cndmask_b32_e64 v28, 0, v28, s[40:41] ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cmp_u_f64_e64 s[42:43], v[30:31], v[32:33] +; GFX7-NEXT: v_cmp_o_f64_e64 s[42:43], v[30:31], v[32:33] ; GFX7-NEXT: v_min_f64 v[30:31], v[30:31], v[32:33] ; GFX7-NEXT: v_mov_b32_e32 v32, 0x7ff80000 -; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v32, vcc -; GFX7-NEXT: v_cndmask_b32_e64 v3, v3, v32, s[4:5] -; GFX7-NEXT: v_cndmask_b32_e64 v5, v5, v32, s[6:7] -; GFX7-NEXT: v_cndmask_b32_e64 v7, v7, v32, s[8:9] -; GFX7-NEXT: v_cndmask_b32_e64 v9, v9, v32, s[10:11] -; GFX7-NEXT: v_cndmask_b32_e64 v11, v11, v32, s[12:13] -; GFX7-NEXT: v_cndmask_b32_e64 v13, v13, v32, s[14:15] -; GFX7-NEXT: v_cndmask_b32_e64 v15, v15, v32, s[16:17] -; GFX7-NEXT: v_cndmask_b32_e64 v17, v17, v32, s[18:19] -; GFX7-NEXT: v_cndmask_b32_e64 v19, v19, v32, s[20:21] -; GFX7-NEXT: v_cndmask_b32_e64 v21, v21, v32, s[22:23] -; GFX7-NEXT: v_cndmask_b32_e64 v23, v23, v32, s[24:25] -; GFX7-NEXT: v_cndmask_b32_e64 v25, v25, v32, s[26:27] -; GFX7-NEXT: v_cndmask_b32_e64 v27, v27, v32, s[28:29] -; GFX7-NEXT: v_cndmask_b32_e64 v29, v29, v32, s[40:41] -; GFX7-NEXT: v_cndmask_b32_e64 v31, v31, v32, s[42:43] -; GFX7-NEXT: v_cndmask_b32_e64 v30, v30, 0, s[42:43] +; GFX7-NEXT: v_cndmask_b32_e32 v1, v32, v1, vcc +; GFX7-NEXT: v_cndmask_b32_e64 v3, v32, v3, s[4:5] +; GFX7-NEXT: v_cndmask_b32_e64 v5, v32, v5, s[6:7] +; GFX7-NEXT: v_cndmask_b32_e64 v7, v32, v7, s[8:9] +; GFX7-NEXT: v_cndmask_b32_e64 v9, v32, v9, s[10:11] +; GFX7-NEXT: v_cndmask_b32_e64 v11, v32, v11, s[12:13] +; GFX7-NEXT: v_cndmask_b32_e64 v13, v32, v13, s[14:15] +; GFX7-NEXT: v_cndmask_b32_e64 v15, v32, v15, s[16:17] +; GFX7-NEXT: v_cndmask_b32_e64 v17, v32, v17, s[18:19] +; GFX7-NEXT: v_cndmask_b32_e64 v19, v32, v19, s[20:21] +; GFX7-NEXT: v_cndmask_b32_e64 v21, v32, v21, s[22:23] +; GFX7-NEXT: v_cndmask_b32_e64 v23, v32, v23, s[24:25] +; GFX7-NEXT: v_cndmask_b32_e64 v25, v32, v25, s[26:27] +; GFX7-NEXT: v_cndmask_b32_e64 v27, v32, v27, s[28:29] +; GFX7-NEXT: v_cndmask_b32_e64 v29, v32, v29, s[40:41] +; GFX7-NEXT: v_cndmask_b32_e64 v31, v32, v31, s[42:43] +; GFX7-NEXT: v_cndmask_b32_e64 v30, 0, v30, s[42:43] ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_minimum_v16f64: @@ -2130,117 +2277,117 @@ define <16 x double> @v_minimum_v16f64(<16 x double> %src0, <16 x double> %src1) ; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:8 ; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4 ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[31:32] +; GFX8-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[31:32] ; GFX8-NEXT: v_min_f64 v[0:1], v[0:1], v[31:32] ; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:16 ; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:12 -; GFX8-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[31:32] +; GFX8-NEXT: v_cmp_o_f64_e64 s[4:5], v[2:3], v[31:32] ; GFX8-NEXT: v_min_f64 v[2:3], v[2:3], v[31:32] ; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:24 ; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:20 -; GFX8-NEXT: v_cndmask_b32_e64 v2, v2, 0, s[4:5] +; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, v2, s[4:5] ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[31:32] +; GFX8-NEXT: v_cmp_o_f64_e64 s[6:7], v[4:5], v[31:32] ; GFX8-NEXT: v_min_f64 v[4:5], v[4:5], v[31:32] ; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:32 ; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:28 -; GFX8-NEXT: v_cndmask_b32_e64 v4, v4, 0, s[6:7] +; GFX8-NEXT: v_cndmask_b32_e64 v4, 0, v4, s[6:7] ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[31:32] +; GFX8-NEXT: v_cmp_o_f64_e64 s[8:9], v[6:7], v[31:32] ; GFX8-NEXT: v_min_f64 v[6:7], v[6:7], v[31:32] ; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:36 ; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:40 -; GFX8-NEXT: v_cndmask_b32_e64 v6, v6, 0, s[8:9] +; GFX8-NEXT: v_cndmask_b32_e64 v6, 0, v6, s[8:9] ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_cmp_u_f64_e64 s[10:11], v[8:9], v[31:32] +; GFX8-NEXT: v_cmp_o_f64_e64 s[10:11], v[8:9], v[31:32] ; GFX8-NEXT: v_min_f64 v[8:9], v[8:9], v[31:32] ; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:48 ; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:44 -; GFX8-NEXT: v_cndmask_b32_e64 v8, v8, 0, s[10:11] +; GFX8-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[10:11] ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_cmp_u_f64_e64 s[12:13], v[10:11], v[31:32] +; GFX8-NEXT: v_cmp_o_f64_e64 s[12:13], v[10:11], v[31:32] ; GFX8-NEXT: v_min_f64 v[10:11], v[10:11], v[31:32] ; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:56 ; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:52 -; GFX8-NEXT: v_cndmask_b32_e64 v10, v10, 0, s[12:13] +; GFX8-NEXT: v_cndmask_b32_e64 v10, 0, v10, s[12:13] ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_cmp_u_f64_e64 s[14:15], v[12:13], v[31:32] +; GFX8-NEXT: v_cmp_o_f64_e64 s[14:15], v[12:13], v[31:32] ; GFX8-NEXT: v_min_f64 v[12:13], v[12:13], v[31:32] ; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:64 ; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:60 -; GFX8-NEXT: v_cndmask_b32_e64 v12, v12, 0, s[14:15] +; GFX8-NEXT: v_cndmask_b32_e64 v12, 0, v12, s[14:15] ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_cmp_u_f64_e64 s[16:17], v[14:15], v[31:32] +; GFX8-NEXT: v_cmp_o_f64_e64 s[16:17], v[14:15], v[31:32] ; GFX8-NEXT: v_min_f64 v[14:15], v[14:15], v[31:32] ; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:68 ; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:72 -; GFX8-NEXT: v_cndmask_b32_e64 v14, v14, 0, s[16:17] +; GFX8-NEXT: v_cndmask_b32_e64 v14, 0, v14, s[16:17] ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_cmp_u_f64_e64 s[18:19], v[16:17], v[31:32] +; GFX8-NEXT: v_cmp_o_f64_e64 s[18:19], v[16:17], v[31:32] ; GFX8-NEXT: v_min_f64 v[16:17], v[16:17], v[31:32] ; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:80 ; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:76 -; GFX8-NEXT: v_cndmask_b32_e64 v16, v16, 0, s[18:19] +; GFX8-NEXT: v_cndmask_b32_e64 v16, 0, v16, s[18:19] ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_cmp_u_f64_e64 s[20:21], v[18:19], v[31:32] +; GFX8-NEXT: v_cmp_o_f64_e64 s[20:21], v[18:19], v[31:32] ; GFX8-NEXT: v_min_f64 v[18:19], v[18:19], v[31:32] ; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:88 ; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:84 -; GFX8-NEXT: v_cndmask_b32_e64 v18, v18, 0, s[20:21] +; GFX8-NEXT: v_cndmask_b32_e64 v18, 0, v18, s[20:21] ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_cmp_u_f64_e64 s[22:23], v[20:21], v[31:32] +; GFX8-NEXT: v_cmp_o_f64_e64 s[22:23], v[20:21], v[31:32] ; GFX8-NEXT: v_min_f64 v[20:21], v[20:21], v[31:32] ; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:96 ; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:92 -; GFX8-NEXT: v_cndmask_b32_e64 v20, v20, 0, s[22:23] +; GFX8-NEXT: v_cndmask_b32_e64 v20, 0, v20, s[22:23] ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_cmp_u_f64_e64 s[24:25], v[22:23], v[31:32] +; GFX8-NEXT: v_cmp_o_f64_e64 s[24:25], v[22:23], v[31:32] ; GFX8-NEXT: v_min_f64 v[22:23], v[22:23], v[31:32] ; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:100 ; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:104 -; GFX8-NEXT: v_cndmask_b32_e64 v22, v22, 0, s[24:25] +; GFX8-NEXT: v_cndmask_b32_e64 v22, 0, v22, s[24:25] ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_cmp_u_f64_e64 s[26:27], v[24:25], v[31:32] +; GFX8-NEXT: v_cmp_o_f64_e64 s[26:27], v[24:25], v[31:32] ; GFX8-NEXT: v_min_f64 v[24:25], v[24:25], v[31:32] ; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:112 ; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:108 -; GFX8-NEXT: v_cndmask_b32_e64 v24, v24, 0, s[26:27] +; GFX8-NEXT: v_cndmask_b32_e64 v24, 0, v24, s[26:27] ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_cmp_u_f64_e64 s[28:29], v[26:27], v[31:32] +; GFX8-NEXT: v_cmp_o_f64_e64 s[28:29], v[26:27], v[31:32] ; GFX8-NEXT: v_min_f64 v[26:27], v[26:27], v[31:32] ; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:120 ; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:116 -; GFX8-NEXT: v_cndmask_b32_e64 v26, v26, 0, s[28:29] +; GFX8-NEXT: v_cndmask_b32_e64 v26, 0, v26, s[28:29] ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_cmp_u_f64_e64 s[40:41], v[28:29], v[31:32] +; GFX8-NEXT: v_cmp_o_f64_e64 s[40:41], v[28:29], v[31:32] ; GFX8-NEXT: v_min_f64 v[28:29], v[28:29], v[31:32] ; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; GFX8-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:128 ; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:124 -; GFX8-NEXT: v_cndmask_b32_e64 v28, v28, 0, s[40:41] +; GFX8-NEXT: v_cndmask_b32_e64 v28, 0, v28, s[40:41] ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_cmp_u_f64_e64 s[42:43], v[30:31], v[32:33] +; GFX8-NEXT: v_cmp_o_f64_e64 s[42:43], v[30:31], v[32:33] ; GFX8-NEXT: v_min_f64 v[30:31], v[30:31], v[32:33] ; GFX8-NEXT: v_mov_b32_e32 v32, 0x7ff80000 -; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v32, vcc -; GFX8-NEXT: v_cndmask_b32_e64 v3, v3, v32, s[4:5] -; GFX8-NEXT: v_cndmask_b32_e64 v5, v5, v32, s[6:7] -; GFX8-NEXT: v_cndmask_b32_e64 v7, v7, v32, s[8:9] -; GFX8-NEXT: v_cndmask_b32_e64 v9, v9, v32, s[10:11] -; GFX8-NEXT: v_cndmask_b32_e64 v11, v11, v32, s[12:13] -; GFX8-NEXT: v_cndmask_b32_e64 v13, v13, v32, s[14:15] -; GFX8-NEXT: v_cndmask_b32_e64 v15, v15, v32, s[16:17] -; GFX8-NEXT: v_cndmask_b32_e64 v17, v17, v32, s[18:19] -; GFX8-NEXT: v_cndmask_b32_e64 v19, v19, v32, s[20:21] -; GFX8-NEXT: v_cndmask_b32_e64 v21, v21, v32, s[22:23] -; GFX8-NEXT: v_cndmask_b32_e64 v23, v23, v32, s[24:25] -; GFX8-NEXT: v_cndmask_b32_e64 v25, v25, v32, s[26:27] -; GFX8-NEXT: v_cndmask_b32_e64 v27, v27, v32, s[28:29] -; GFX8-NEXT: v_cndmask_b32_e64 v29, v29, v32, s[40:41] -; GFX8-NEXT: v_cndmask_b32_e64 v31, v31, v32, s[42:43] -; GFX8-NEXT: v_cndmask_b32_e64 v30, v30, 0, s[42:43] +; GFX8-NEXT: v_cndmask_b32_e32 v1, v32, v1, vcc +; GFX8-NEXT: v_cndmask_b32_e64 v3, v32, v3, s[4:5] +; GFX8-NEXT: v_cndmask_b32_e64 v5, v32, v5, s[6:7] +; GFX8-NEXT: v_cndmask_b32_e64 v7, v32, v7, s[8:9] +; GFX8-NEXT: v_cndmask_b32_e64 v9, v32, v9, s[10:11] +; GFX8-NEXT: v_cndmask_b32_e64 v11, v32, v11, s[12:13] +; GFX8-NEXT: v_cndmask_b32_e64 v13, v32, v13, s[14:15] +; GFX8-NEXT: v_cndmask_b32_e64 v15, v32, v15, s[16:17] +; GFX8-NEXT: v_cndmask_b32_e64 v17, v32, v17, s[18:19] +; GFX8-NEXT: v_cndmask_b32_e64 v19, v32, v19, s[20:21] +; GFX8-NEXT: v_cndmask_b32_e64 v21, v32, v21, s[22:23] +; GFX8-NEXT: v_cndmask_b32_e64 v23, v32, v23, s[24:25] +; GFX8-NEXT: v_cndmask_b32_e64 v25, v32, v25, s[26:27] +; GFX8-NEXT: v_cndmask_b32_e64 v27, v32, v27, s[28:29] +; GFX8-NEXT: v_cndmask_b32_e64 v29, v32, v29, s[40:41] +; GFX8-NEXT: v_cndmask_b32_e64 v31, v32, v31, s[42:43] +; GFX8-NEXT: v_cndmask_b32_e64 v30, 0, v30, s[42:43] ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-LABEL: v_minimum_v16f64: @@ -2249,117 +2396,117 @@ define <16 x double> @v_minimum_v16f64(<16 x double> %src0, <16 x double> %src1) ; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:8 ; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4 ; GFX900-NEXT: s_waitcnt vmcnt(0) -; GFX900-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[31:32] +; GFX900-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[31:32] ; GFX900-NEXT: v_min_f64 v[0:1], v[0:1], v[31:32] ; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:16 ; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:12 -; GFX900-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; GFX900-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc ; GFX900-NEXT: s_waitcnt vmcnt(0) -; GFX900-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[31:32] +; GFX900-NEXT: v_cmp_o_f64_e64 s[4:5], v[2:3], v[31:32] ; GFX900-NEXT: v_min_f64 v[2:3], v[2:3], v[31:32] ; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:24 ; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:20 -; GFX900-NEXT: v_cndmask_b32_e64 v2, v2, 0, s[4:5] +; GFX900-NEXT: v_cndmask_b32_e64 v2, 0, v2, s[4:5] ; GFX900-NEXT: s_waitcnt vmcnt(0) -; GFX900-NEXT: v_cmp_u_f64_e64 s[6:7], v[4:5], v[31:32] +; GFX900-NEXT: v_cmp_o_f64_e64 s[6:7], v[4:5], v[31:32] ; GFX900-NEXT: v_min_f64 v[4:5], v[4:5], v[31:32] ; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:32 ; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:28 -; GFX900-NEXT: v_cndmask_b32_e64 v4, v4, 0, s[6:7] +; GFX900-NEXT: v_cndmask_b32_e64 v4, 0, v4, s[6:7] ; GFX900-NEXT: s_waitcnt vmcnt(0) -; GFX900-NEXT: v_cmp_u_f64_e64 s[8:9], v[6:7], v[31:32] +; GFX900-NEXT: v_cmp_o_f64_e64 s[8:9], v[6:7], v[31:32] ; GFX900-NEXT: v_min_f64 v[6:7], v[6:7], v[31:32] ; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:36 ; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:40 -; GFX900-NEXT: v_cndmask_b32_e64 v6, v6, 0, s[8:9] +; GFX900-NEXT: v_cndmask_b32_e64 v6, 0, v6, s[8:9] ; GFX900-NEXT: s_waitcnt vmcnt(0) -; GFX900-NEXT: v_cmp_u_f64_e64 s[10:11], v[8:9], v[31:32] +; GFX900-NEXT: v_cmp_o_f64_e64 s[10:11], v[8:9], v[31:32] ; GFX900-NEXT: v_min_f64 v[8:9], v[8:9], v[31:32] ; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:48 ; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:44 -; GFX900-NEXT: v_cndmask_b32_e64 v8, v8, 0, s[10:11] +; GFX900-NEXT: v_cndmask_b32_e64 v8, 0, v8, s[10:11] ; GFX900-NEXT: s_waitcnt vmcnt(0) -; GFX900-NEXT: v_cmp_u_f64_e64 s[12:13], v[10:11], v[31:32] +; GFX900-NEXT: v_cmp_o_f64_e64 s[12:13], v[10:11], v[31:32] ; GFX900-NEXT: v_min_f64 v[10:11], v[10:11], v[31:32] ; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:56 ; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:52 -; GFX900-NEXT: v_cndmask_b32_e64 v10, v10, 0, s[12:13] +; GFX900-NEXT: v_cndmask_b32_e64 v10, 0, v10, s[12:13] ; GFX900-NEXT: s_waitcnt vmcnt(0) -; GFX900-NEXT: v_cmp_u_f64_e64 s[14:15], v[12:13], v[31:32] +; GFX900-NEXT: v_cmp_o_f64_e64 s[14:15], v[12:13], v[31:32] ; GFX900-NEXT: v_min_f64 v[12:13], v[12:13], v[31:32] ; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:64 ; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:60 -; GFX900-NEXT: v_cndmask_b32_e64 v12, v12, 0, s[14:15] +; GFX900-NEXT: v_cndmask_b32_e64 v12, 0, v12, s[14:15] ; GFX900-NEXT: s_waitcnt vmcnt(0) -; GFX900-NEXT: v_cmp_u_f64_e64 s[16:17], v[14:15], v[31:32] +; GFX900-NEXT: v_cmp_o_f64_e64 s[16:17], v[14:15], v[31:32] ; GFX900-NEXT: v_min_f64 v[14:15], v[14:15], v[31:32] ; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:68 ; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:72 -; GFX900-NEXT: v_cndmask_b32_e64 v14, v14, 0, s[16:17] +; GFX900-NEXT: v_cndmask_b32_e64 v14, 0, v14, s[16:17] ; GFX900-NEXT: s_waitcnt vmcnt(0) -; GFX900-NEXT: v_cmp_u_f64_e64 s[18:19], v[16:17], v[31:32] +; GFX900-NEXT: v_cmp_o_f64_e64 s[18:19], v[16:17], v[31:32] ; GFX900-NEXT: v_min_f64 v[16:17], v[16:17], v[31:32] ; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:80 ; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:76 -; GFX900-NEXT: v_cndmask_b32_e64 v16, v16, 0, s[18:19] +; GFX900-NEXT: v_cndmask_b32_e64 v16, 0, v16, s[18:19] ; GFX900-NEXT: s_waitcnt vmcnt(0) -; GFX900-NEXT: v_cmp_u_f64_e64 s[20:21], v[18:19], v[31:32] +; GFX900-NEXT: v_cmp_o_f64_e64 s[20:21], v[18:19], v[31:32] ; GFX900-NEXT: v_min_f64 v[18:19], v[18:19], v[31:32] ; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:88 ; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:84 -; GFX900-NEXT: v_cndmask_b32_e64 v18, v18, 0, s[20:21] +; GFX900-NEXT: v_cndmask_b32_e64 v18, 0, v18, s[20:21] ; GFX900-NEXT: s_waitcnt vmcnt(0) -; GFX900-NEXT: v_cmp_u_f64_e64 s[22:23], v[20:21], v[31:32] +; GFX900-NEXT: v_cmp_o_f64_e64 s[22:23], v[20:21], v[31:32] ; GFX900-NEXT: v_min_f64 v[20:21], v[20:21], v[31:32] ; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:96 ; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:92 -; GFX900-NEXT: v_cndmask_b32_e64 v20, v20, 0, s[22:23] +; GFX900-NEXT: v_cndmask_b32_e64 v20, 0, v20, s[22:23] ; GFX900-NEXT: s_waitcnt vmcnt(0) -; GFX900-NEXT: v_cmp_u_f64_e64 s[24:25], v[22:23], v[31:32] +; GFX900-NEXT: v_cmp_o_f64_e64 s[24:25], v[22:23], v[31:32] ; GFX900-NEXT: v_min_f64 v[22:23], v[22:23], v[31:32] ; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:100 ; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:104 -; GFX900-NEXT: v_cndmask_b32_e64 v22, v22, 0, s[24:25] +; GFX900-NEXT: v_cndmask_b32_e64 v22, 0, v22, s[24:25] ; GFX900-NEXT: s_waitcnt vmcnt(0) -; GFX900-NEXT: v_cmp_u_f64_e64 s[26:27], v[24:25], v[31:32] +; GFX900-NEXT: v_cmp_o_f64_e64 s[26:27], v[24:25], v[31:32] ; GFX900-NEXT: v_min_f64 v[24:25], v[24:25], v[31:32] ; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:112 ; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:108 -; GFX900-NEXT: v_cndmask_b32_e64 v24, v24, 0, s[26:27] +; GFX900-NEXT: v_cndmask_b32_e64 v24, 0, v24, s[26:27] ; GFX900-NEXT: s_waitcnt vmcnt(0) -; GFX900-NEXT: v_cmp_u_f64_e64 s[28:29], v[26:27], v[31:32] +; GFX900-NEXT: v_cmp_o_f64_e64 s[28:29], v[26:27], v[31:32] ; GFX900-NEXT: v_min_f64 v[26:27], v[26:27], v[31:32] ; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:120 ; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:116 -; GFX900-NEXT: v_cndmask_b32_e64 v26, v26, 0, s[28:29] +; GFX900-NEXT: v_cndmask_b32_e64 v26, 0, v26, s[28:29] ; GFX900-NEXT: s_waitcnt vmcnt(0) -; GFX900-NEXT: v_cmp_u_f64_e64 s[40:41], v[28:29], v[31:32] +; GFX900-NEXT: v_cmp_o_f64_e64 s[40:41], v[28:29], v[31:32] ; GFX900-NEXT: v_min_f64 v[28:29], v[28:29], v[31:32] ; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; GFX900-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:128 ; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:124 -; GFX900-NEXT: v_cndmask_b32_e64 v28, v28, 0, s[40:41] +; GFX900-NEXT: v_cndmask_b32_e64 v28, 0, v28, s[40:41] ; GFX900-NEXT: s_waitcnt vmcnt(0) -; GFX900-NEXT: v_cmp_u_f64_e64 s[42:43], v[30:31], v[32:33] +; GFX900-NEXT: v_cmp_o_f64_e64 s[42:43], v[30:31], v[32:33] ; GFX900-NEXT: v_min_f64 v[30:31], v[30:31], v[32:33] ; GFX900-NEXT: v_mov_b32_e32 v32, 0x7ff80000 -; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v32, vcc -; GFX900-NEXT: v_cndmask_b32_e64 v3, v3, v32, s[4:5] -; GFX900-NEXT: v_cndmask_b32_e64 v5, v5, v32, s[6:7] -; GFX900-NEXT: v_cndmask_b32_e64 v7, v7, v32, s[8:9] -; GFX900-NEXT: v_cndmask_b32_e64 v9, v9, v32, s[10:11] -; GFX900-NEXT: v_cndmask_b32_e64 v11, v11, v32, s[12:13] -; GFX900-NEXT: v_cndmask_b32_e64 v13, v13, v32, s[14:15] -; GFX900-NEXT: v_cndmask_b32_e64 v15, v15, v32, s[16:17] -; GFX900-NEXT: v_cndmask_b32_e64 v17, v17, v32, s[18:19] -; GFX900-NEXT: v_cndmask_b32_e64 v19, v19, v32, s[20:21] -; GFX900-NEXT: v_cndmask_b32_e64 v21, v21, v32, s[22:23] -; GFX900-NEXT: v_cndmask_b32_e64 v23, v23, v32, s[24:25] -; GFX900-NEXT: v_cndmask_b32_e64 v25, v25, v32, s[26:27] -; GFX900-NEXT: v_cndmask_b32_e64 v27, v27, v32, s[28:29] -; GFX900-NEXT: v_cndmask_b32_e64 v29, v29, v32, s[40:41] -; GFX900-NEXT: v_cndmask_b32_e64 v31, v31, v32, s[42:43] -; GFX900-NEXT: v_cndmask_b32_e64 v30, v30, 0, s[42:43] +; GFX900-NEXT: v_cndmask_b32_e32 v1, v32, v1, vcc +; GFX900-NEXT: v_cndmask_b32_e64 v3, v32, v3, s[4:5] +; GFX900-NEXT: v_cndmask_b32_e64 v5, v32, v5, s[6:7] +; GFX900-NEXT: v_cndmask_b32_e64 v7, v32, v7, s[8:9] +; GFX900-NEXT: v_cndmask_b32_e64 v9, v32, v9, s[10:11] +; GFX900-NEXT: v_cndmask_b32_e64 v11, v32, v11, s[12:13] +; GFX900-NEXT: v_cndmask_b32_e64 v13, v32, v13, s[14:15] +; GFX900-NEXT: v_cndmask_b32_e64 v15, v32, v15, s[16:17] +; GFX900-NEXT: v_cndmask_b32_e64 v17, v32, v17, s[18:19] +; GFX900-NEXT: v_cndmask_b32_e64 v19, v32, v19, s[20:21] +; GFX900-NEXT: v_cndmask_b32_e64 v21, v32, v21, s[22:23] +; GFX900-NEXT: v_cndmask_b32_e64 v23, v32, v23, s[24:25] +; GFX900-NEXT: v_cndmask_b32_e64 v25, v32, v25, s[26:27] +; GFX900-NEXT: v_cndmask_b32_e64 v27, v32, v27, s[28:29] +; GFX900-NEXT: v_cndmask_b32_e64 v29, v32, v29, s[40:41] +; GFX900-NEXT: v_cndmask_b32_e64 v31, v32, v31, s[42:43] +; GFX900-NEXT: v_cndmask_b32_e64 v30, 0, v30, s[42:43] ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_minimum_v16f64: @@ -2409,6 +2556,7 @@ define <16 x double> @v_minimum_v16f64(<16 x double> %src0, <16 x double> %src1) ; GFX950-NEXT: v_accvgpr_write_b32 a14, v62 ; Reload Reuse ; GFX950-NEXT: v_accvgpr_write_b32 a15, v63 ; Reload Reuse ; GFX950-NEXT: s_waitcnt vmcnt(25) +<<<<<<< HEAD ; GFX950-NEXT: v_min_f64 v[58:59], v[0:1], v[32:33] ; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[32:33] ; GFX950-NEXT: scratch_load_dword v33, off, s32 offset:112 @@ -2517,6 +2665,115 @@ define <16 x double> @v_minimum_v16f64(<16 x double> %src0, <16 x double> %src1) ; GFX950-NEXT: v_accvgpr_read_b32 v60, a12 ; Reload Reuse ; GFX950-NEXT: v_accvgpr_read_b32 v59, a11 ; Reload Reuse ; GFX950-NEXT: v_accvgpr_read_b32 v58, a10 ; Reload Reuse +======= +; GFX950-NEXT: v_min_f64 v[58:59], v[2:3], v[36:37] +; GFX950-NEXT: v_cmp_o_f64_e32 vcc, v[2:3], v[36:37] +; GFX950-NEXT: scratch_load_dword v37, off, s32 offset:112 +; GFX950-NEXT: scratch_load_dword v36, off, s32 offset:108 +; GFX950-NEXT: s_waitcnt vmcnt(25) +; GFX950-NEXT: v_min_f64 v[60:61], v[4:5], v[38:39] +; GFX950-NEXT: v_cmp_o_f64_e64 s[0:1], v[4:5], v[38:39] +; GFX950-NEXT: scratch_load_dword v39, off, s32 offset:120 +; GFX950-NEXT: scratch_load_dword v38, off, s32 offset:116 +; GFX950-NEXT: s_waitcnt vmcnt(25) +; GFX950-NEXT: v_min_f64 v[62:63], v[6:7], v[48:49] +; GFX950-NEXT: v_cmp_o_f64_e64 s[2:3], v[6:7], v[48:49] +; GFX950-NEXT: scratch_load_dword v49, off, s32 offset:128 +; GFX950-NEXT: scratch_load_dword v48, off, s32 offset:124 +; GFX950-NEXT: s_waitcnt vmcnt(25) +; GFX950-NEXT: v_min_f64 v[2:3], v[0:1], v[56:57] +; GFX950-NEXT: v_cmp_o_f64_e64 s[4:5], v[0:1], v[56:57] +; GFX950-NEXT: v_mov_b32_e32 v0, 0x7ff80000 +; GFX950-NEXT: s_waitcnt vmcnt(23) +; GFX950-NEXT: v_min_f64 v[56:57], v[8:9], v[46:47] +; GFX950-NEXT: v_cndmask_b32_e64 v1, 0, v2, s[4:5] +; GFX950-NEXT: v_accvgpr_write_b32 a0, v1 +; GFX950-NEXT: v_cndmask_b32_e64 v1, v0, v3, s[4:5] +; GFX950-NEXT: v_cndmask_b32_e32 v2, 0, v58, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v3, v0, v59, vcc +; GFX950-NEXT: v_cmp_o_f64_e32 vcc, v[8:9], v[46:47] +; GFX950-NEXT: s_waitcnt vmcnt(21) +; GFX950-NEXT: v_min_f64 v[46:47], v[10:11], v[44:45] +; GFX950-NEXT: v_cndmask_b32_e64 v4, 0, v60, s[0:1] +; GFX950-NEXT: v_cndmask_b32_e32 v8, 0, v56, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v9, v0, v57, vcc +; GFX950-NEXT: v_cmp_o_f64_e32 vcc, v[10:11], v[44:45] +; GFX950-NEXT: s_waitcnt vmcnt(19) +; GFX950-NEXT: v_min_f64 v[44:45], v[12:13], v[42:43] +; GFX950-NEXT: v_cndmask_b32_e64 v5, v0, v61, s[0:1] +; GFX950-NEXT: v_cndmask_b32_e32 v10, 0, v46, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v11, v0, v47, vcc +; GFX950-NEXT: v_cmp_o_f64_e32 vcc, v[12:13], v[42:43] +; GFX950-NEXT: s_waitcnt vmcnt(17) +; GFX950-NEXT: v_min_f64 v[42:43], v[14:15], v[40:41] +; GFX950-NEXT: v_cndmask_b32_e64 v6, 0, v62, s[2:3] +; GFX950-NEXT: v_cndmask_b32_e32 v12, 0, v44, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v13, v0, v45, vcc +; GFX950-NEXT: v_cmp_o_f64_e32 vcc, v[14:15], v[40:41] +; GFX950-NEXT: s_waitcnt vmcnt(15) +; GFX950-NEXT: v_min_f64 v[40:41], v[16:17], v[54:55] +; GFX950-NEXT: v_cndmask_b32_e64 v7, v0, v63, s[2:3] +; GFX950-NEXT: v_cndmask_b32_e32 v14, 0, v42, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v15, v0, v43, vcc +; GFX950-NEXT: v_cmp_o_f64_e32 vcc, v[16:17], v[54:55] +; GFX950-NEXT: s_waitcnt vmcnt(13) +; GFX950-NEXT: v_min_f64 v[54:55], v[18:19], v[52:53] +; GFX950-NEXT: v_accvgpr_read_b32 v63, a16 ; Reload Reuse +; GFX950-NEXT: v_cndmask_b32_e32 v16, 0, v40, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v17, v0, v41, vcc +; GFX950-NEXT: v_cmp_o_f64_e32 vcc, v[18:19], v[52:53] +; GFX950-NEXT: s_waitcnt vmcnt(11) +; GFX950-NEXT: v_min_f64 v[52:53], v[20:21], v[50:51] +; GFX950-NEXT: v_accvgpr_read_b32 v62, a15 ; Reload Reuse +; GFX950-NEXT: v_cndmask_b32_e32 v18, 0, v54, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v19, v0, v55, vcc +; GFX950-NEXT: v_cmp_o_f64_e32 vcc, v[20:21], v[50:51] +; GFX950-NEXT: s_waitcnt vmcnt(9) +; GFX950-NEXT: v_min_f64 v[50:51], v[22:23], v[34:35] +; GFX950-NEXT: v_accvgpr_read_b32 v61, a14 ; Reload Reuse +; GFX950-NEXT: v_cndmask_b32_e32 v20, 0, v52, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v21, v0, v53, vcc +; GFX950-NEXT: v_cmp_o_f64_e32 vcc, v[22:23], v[34:35] +; GFX950-NEXT: s_waitcnt vmcnt(6) +; GFX950-NEXT: v_min_f64 v[34:35], v[24:25], v[32:33] +; GFX950-NEXT: v_accvgpr_read_b32 v60, a13 ; Reload Reuse +; GFX950-NEXT: v_cndmask_b32_e32 v22, 0, v50, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v23, v0, v51, vcc +; GFX950-NEXT: v_cmp_o_f64_e32 vcc, v[24:25], v[32:33] +; GFX950-NEXT: v_accvgpr_read_b32 v59, a12 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_read_b32 v58, a11 ; Reload Reuse +; GFX950-NEXT: v_cndmask_b32_e32 v24, 0, v34, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v25, v0, v35, vcc +; GFX950-NEXT: v_accvgpr_read_b32 v57, a10 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_read_b32 v56, a9 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_read_b32 v47, a8 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_read_b32 v46, a7 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_read_b32 v45, a6 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_read_b32 v44, a5 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_read_b32 v43, a4 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_read_b32 v42, a3 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_read_b32 v41, a2 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_read_b32 v40, a1 ; Reload Reuse +; GFX950-NEXT: s_waitcnt vmcnt(4) +; GFX950-NEXT: v_min_f64 v[32:33], v[26:27], v[36:37] +; GFX950-NEXT: v_cmp_o_f64_e32 vcc, v[26:27], v[36:37] +; GFX950-NEXT: s_nop 1 +; GFX950-NEXT: v_cndmask_b32_e32 v26, 0, v32, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v27, v0, v33, vcc +; GFX950-NEXT: s_waitcnt vmcnt(2) +; GFX950-NEXT: v_min_f64 v[32:33], v[28:29], v[38:39] +; GFX950-NEXT: v_cmp_o_f64_e32 vcc, v[28:29], v[38:39] +; GFX950-NEXT: s_nop 1 +; GFX950-NEXT: v_cndmask_b32_e32 v28, 0, v32, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v29, v0, v33, vcc +; GFX950-NEXT: s_waitcnt vmcnt(0) +; GFX950-NEXT: v_min_f64 v[32:33], v[30:31], v[48:49] +; GFX950-NEXT: v_cmp_o_f64_e32 vcc, v[30:31], v[48:49] +; GFX950-NEXT: s_nop 1 +; GFX950-NEXT: v_cndmask_b32_e32 v30, 0, v32, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v31, v0, v33, vcc +; GFX950-NEXT: v_accvgpr_read_b32 v0, a0 +>>>>>>> 1336afc5defe (update tests) ; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_minimum_v16f64: @@ -2550,17 +2807,26 @@ define <16 x double> @v_minimum_v16f64(<16 x double> %src0, <16 x double> %src1) ; GFX10-NEXT: buffer_load_dword v51, off, s[0:3], s32 offset:72 ; GFX10-NEXT: buffer_load_dword v67, off, s[0:3], s32 offset:104 ; GFX10-NEXT: s_waitcnt vmcnt(24) +<<<<<<< HEAD ; GFX10-NEXT: v_min_f64 v[82:83], v[0:1], v[31:32] ; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[31:32] ; GFX10-NEXT: s_waitcnt vmcnt(22) ; GFX10-NEXT: v_min_f64 v[84:85], v[2:3], v[33:34] ; GFX10-NEXT: v_cmp_u_f64_e64 s4, v[2:3], v[33:34] +======= +; GFX10-NEXT: v_min_f64 v[82:83], v[2:3], v[31:32] +; GFX10-NEXT: v_cmp_o_f64_e32 vcc_lo, v[2:3], v[31:32] +; GFX10-NEXT: s_waitcnt vmcnt(22) +; GFX10-NEXT: v_min_f64 v[84:85], v[4:5], v[33:34] +; GFX10-NEXT: v_cmp_o_f64_e64 s4, v[4:5], v[33:34] +>>>>>>> 1336afc5defe (update tests) ; GFX10-NEXT: s_clause 0x3 ; GFX10-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:120 ; GFX10-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:116 ; GFX10-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:112 ; GFX10-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:108 ; GFX10-NEXT: s_waitcnt vmcnt(24) +<<<<<<< HEAD ; GFX10-NEXT: v_min_f64 v[32:33], v[4:5], v[35:36] ; GFX10-NEXT: v_cmp_u_f64_e64 s5, v[4:5], v[35:36] ; GFX10-NEXT: s_clause 0x2 @@ -2585,9 +2851,36 @@ define <16 x double> @v_minimum_v16f64(<16 x double> %src0, <16 x double> %src1) ; GFX10-NEXT: s_waitcnt vmcnt(11) ; GFX10-NEXT: v_min_f64 v[64:65], v[20:21], v[70:71] ; GFX10-NEXT: v_cmp_u_f64_e64 s13, v[20:21], v[70:71] +======= +; GFX10-NEXT: v_min_f64 v[32:33], v[6:7], v[35:36] +; GFX10-NEXT: v_cmp_o_f64_e64 s5, v[6:7], v[35:36] +; GFX10-NEXT: s_clause 0x2 +; GFX10-NEXT: buffer_load_dword v31, off, s[0:3], s32 +; GFX10-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:128 +; GFX10-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:124 +; GFX10-NEXT: s_waitcnt vmcnt(23) +; GFX10-NEXT: v_cmp_o_f64_e64 s10, v[14:15], v[50:51] +; GFX10-NEXT: s_waitcnt vmcnt(21) +; GFX10-NEXT: v_cmp_o_f64_e64 s9, v[12:13], v[52:53] +; GFX10-NEXT: s_waitcnt vmcnt(19) +; GFX10-NEXT: v_cmp_o_f64_e64 s7, v[10:11], v[54:55] +; GFX10-NEXT: s_waitcnt vmcnt(18) +; GFX10-NEXT: v_min_f64 v[34:35], v[8:9], v[37:38] +; GFX10-NEXT: v_cmp_o_f64_e64 s6, v[8:9], v[37:38] +; GFX10-NEXT: s_waitcnt vmcnt(16) +; GFX10-NEXT: v_min_f64 v[8:9], v[0:1], v[64:65] +; GFX10-NEXT: v_min_f64 v[36:37], v[10:11], v[54:55] +; GFX10-NEXT: v_cmp_o_f64_e64 s8, v[0:1], v[64:65] +; GFX10-NEXT: v_min_f64 v[38:39], v[12:13], v[52:53] +; GFX10-NEXT: v_min_f64 v[52:53], v[14:15], v[50:51] +; GFX10-NEXT: s_waitcnt vmcnt(11) +; GFX10-NEXT: v_min_f64 v[54:55], v[20:21], v[70:71] +; GFX10-NEXT: v_cmp_o_f64_e64 s13, v[20:21], v[70:71] +>>>>>>> 1336afc5defe (update tests) ; GFX10-NEXT: s_waitcnt vmcnt(9) -; GFX10-NEXT: v_cmp_u_f64_e64 s12, v[18:19], v[80:81] +; GFX10-NEXT: v_cmp_o_f64_e64 s12, v[18:19], v[80:81] ; GFX10-NEXT: s_waitcnt vmcnt(8) +<<<<<<< HEAD ; GFX10-NEXT: v_min_f64 v[52:53], v[16:17], v[50:51] ; GFX10-NEXT: v_cmp_u_f64_e64 s11, v[16:17], v[50:51] ; GFX10-NEXT: v_min_f64 v[50:51], v[18:19], v[80:81] @@ -2637,6 +2930,57 @@ define <16 x double> @v_minimum_v16f64(<16 x double> %src0, <16 x double> %src1) ; GFX10-NEXT: v_cndmask_b32_e64 v27, v67, 0x7ff80000, s16 ; GFX10-NEXT: v_cndmask_b32_e64 v30, v86, 0, s18 ; GFX10-NEXT: v_cndmask_b32_e64 v31, v87, 0x7ff80000, s18 +======= +; GFX10-NEXT: v_min_f64 v[50:51], v[16:17], v[48:49] +; GFX10-NEXT: v_cmp_o_f64_e64 s11, v[16:17], v[48:49] +; GFX10-NEXT: v_min_f64 v[48:49], v[18:19], v[80:81] +; GFX10-NEXT: v_min_f64 v[64:65], v[22:23], v[68:69] +; GFX10-NEXT: v_cmp_o_f64_e64 s14, v[22:23], v[68:69] +; GFX10-NEXT: s_waitcnt vmcnt(7) +; GFX10-NEXT: v_min_f64 v[68:69], v[24:25], v[66:67] +; GFX10-NEXT: v_cmp_o_f64_e64 s15, v[24:25], v[66:67] +; GFX10-NEXT: v_cndmask_b32_e64 v10, 0, v36, s7 +; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, v8, s8 +; GFX10-NEXT: v_cndmask_b32_e64 v1, 0x7ff80000, v9, s8 +; GFX10-NEXT: v_cndmask_b32_e64 v8, 0, v34, s6 +; GFX10-NEXT: v_cndmask_b32_e64 v9, 0x7ff80000, v35, s6 +; GFX10-NEXT: v_cndmask_b32_e64 v11, 0x7ff80000, v37, s7 +; GFX10-NEXT: v_cndmask_b32_e64 v12, 0, v38, s9 +; GFX10-NEXT: v_cndmask_b32_e64 v13, 0x7ff80000, v39, s9 +; GFX10-NEXT: v_cndmask_b32_e64 v14, 0, v52, s10 +; GFX10-NEXT: v_cndmask_b32_e64 v15, 0x7ff80000, v53, s10 +; GFX10-NEXT: v_cndmask_b32_e64 v16, 0, v50, s11 +; GFX10-NEXT: v_cndmask_b32_e64 v17, 0x7ff80000, v51, s11 +; GFX10-NEXT: v_cndmask_b32_e64 v18, 0, v48, s12 +; GFX10-NEXT: v_cndmask_b32_e64 v19, 0x7ff80000, v49, s12 +; GFX10-NEXT: v_cndmask_b32_e64 v20, 0, v54, s13 +; GFX10-NEXT: v_cndmask_b32_e64 v21, 0x7ff80000, v55, s13 +; GFX10-NEXT: v_cndmask_b32_e64 v22, 0, v64, s14 +; GFX10-NEXT: v_cndmask_b32_e64 v23, 0x7ff80000, v65, s14 +; GFX10-NEXT: v_cndmask_b32_e64 v24, 0, v68, s15 +; GFX10-NEXT: v_cndmask_b32_e64 v25, 0x7ff80000, v69, s15 +; GFX10-NEXT: s_waitcnt vmcnt(5) +; GFX10-NEXT: v_min_f64 v[70:71], v[28:29], v[2:3] +; GFX10-NEXT: v_cmp_o_f64_e64 s17, v[28:29], v[2:3] +; GFX10-NEXT: s_waitcnt vmcnt(3) +; GFX10-NEXT: v_min_f64 v[66:67], v[26:27], v[4:5] +; GFX10-NEXT: v_cmp_o_f64_e64 s16, v[26:27], v[4:5] +; GFX10-NEXT: v_cndmask_b32_e32 v2, 0, v82, vcc_lo +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: v_min_f64 v[80:81], v[30:31], v[6:7] +; GFX10-NEXT: v_cmp_o_f64_e64 s18, v[30:31], v[6:7] +; GFX10-NEXT: v_cndmask_b32_e32 v3, 0x7ff80000, v83, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e64 v4, 0, v84, s4 +; GFX10-NEXT: v_cndmask_b32_e64 v5, 0x7ff80000, v85, s4 +; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, v32, s5 +; GFX10-NEXT: v_cndmask_b32_e64 v7, 0x7ff80000, v33, s5 +; GFX10-NEXT: v_cndmask_b32_e64 v28, 0, v70, s17 +; GFX10-NEXT: v_cndmask_b32_e64 v29, 0x7ff80000, v71, s17 +; GFX10-NEXT: v_cndmask_b32_e64 v26, 0, v66, s16 +; GFX10-NEXT: v_cndmask_b32_e64 v27, 0x7ff80000, v67, s16 +; GFX10-NEXT: v_cndmask_b32_e64 v30, 0, v80, s18 +; GFX10-NEXT: v_cndmask_b32_e64 v31, 0x7ff80000, v81, s18 +>>>>>>> 1336afc5defe (update tests) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_minimum_v16f64: @@ -2678,84 +3022,84 @@ define <16 x double> @v_minimum_v16f64(<16 x double> %src0, <16 x double> %src1) ; GFX11-NEXT: scratch_load_b32 v86, off, s32 offset:124 ; GFX11-NEXT: s_waitcnt vmcnt(30) ; GFX11-NEXT: v_min_f64 v[96:97], v[0:1], v[32:33] -; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[32:33] +; GFX11-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[32:33] ; GFX11-NEXT: s_waitcnt vmcnt(28) ; GFX11-NEXT: v_min_f64 v[32:33], v[2:3], v[34:35] -; GFX11-NEXT: v_cmp_u_f64_e64 s0, v[2:3], v[34:35] +; GFX11-NEXT: v_cmp_o_f64_e64 s0, v[2:3], v[34:35] ; GFX11-NEXT: s_waitcnt vmcnt(26) ; GFX11-NEXT: v_min_f64 v[34:35], v[4:5], v[36:37] -; GFX11-NEXT: v_cmp_u_f64_e64 s1, v[4:5], v[36:37] +; GFX11-NEXT: v_cmp_o_f64_e64 s1, v[4:5], v[36:37] ; GFX11-NEXT: s_waitcnt vmcnt(24) ; GFX11-NEXT: v_min_f64 v[36:37], v[6:7], v[38:39] -; GFX11-NEXT: v_cmp_u_f64_e64 s2, v[6:7], v[38:39] +; GFX11-NEXT: v_cmp_o_f64_e64 s2, v[6:7], v[38:39] ; GFX11-NEXT: s_waitcnt vmcnt(22) ; GFX11-NEXT: v_min_f64 v[38:39], v[8:9], v[48:49] -; GFX11-NEXT: v_cmp_u_f64_e64 s3, v[8:9], v[48:49] +; GFX11-NEXT: v_cmp_o_f64_e64 s3, v[8:9], v[48:49] ; GFX11-NEXT: s_waitcnt vmcnt(20) ; GFX11-NEXT: v_min_f64 v[48:49], v[10:11], v[50:51] -; GFX11-NEXT: v_cmp_u_f64_e64 s4, v[10:11], v[50:51] +; GFX11-NEXT: v_cmp_o_f64_e64 s4, v[10:11], v[50:51] ; GFX11-NEXT: s_waitcnt vmcnt(18) ; GFX11-NEXT: v_min_f64 v[50:51], v[12:13], v[52:53] -; GFX11-NEXT: v_cmp_u_f64_e64 s5, v[12:13], v[52:53] +; GFX11-NEXT: v_cmp_o_f64_e64 s5, v[12:13], v[52:53] ; GFX11-NEXT: s_waitcnt vmcnt(16) ; GFX11-NEXT: v_min_f64 v[52:53], v[14:15], v[54:55] -; GFX11-NEXT: v_cmp_u_f64_e64 s6, v[14:15], v[54:55] +; GFX11-NEXT: v_cmp_o_f64_e64 s6, v[14:15], v[54:55] ; GFX11-NEXT: s_waitcnt vmcnt(14) ; GFX11-NEXT: v_min_f64 v[54:55], v[16:17], v[64:65] -; GFX11-NEXT: v_cmp_u_f64_e64 s7, v[16:17], v[64:65] +; GFX11-NEXT: v_cmp_o_f64_e64 s7, v[16:17], v[64:65] ; GFX11-NEXT: s_waitcnt vmcnt(12) ; GFX11-NEXT: v_min_f64 v[64:65], v[18:19], v[66:67] -; GFX11-NEXT: v_cmp_u_f64_e64 s8, v[18:19], v[66:67] +; GFX11-NEXT: v_cmp_o_f64_e64 s8, v[18:19], v[66:67] ; GFX11-NEXT: s_waitcnt vmcnt(10) ; GFX11-NEXT: v_min_f64 v[66:67], v[20:21], v[68:69] -; GFX11-NEXT: v_cmp_u_f64_e64 s9, v[20:21], v[68:69] +; GFX11-NEXT: v_cmp_o_f64_e64 s9, v[20:21], v[68:69] ; GFX11-NEXT: s_waitcnt vmcnt(8) ; GFX11-NEXT: v_min_f64 v[68:69], v[22:23], v[70:71] -; GFX11-NEXT: v_cmp_u_f64_e64 s10, v[22:23], v[70:71] +; GFX11-NEXT: v_cmp_o_f64_e64 s10, v[22:23], v[70:71] ; GFX11-NEXT: s_waitcnt vmcnt(6) ; GFX11-NEXT: v_min_f64 v[70:71], v[24:25], v[80:81] -; GFX11-NEXT: v_cmp_u_f64_e64 s11, v[24:25], v[80:81] +; GFX11-NEXT: v_cmp_o_f64_e64 s11, v[24:25], v[80:81] ; GFX11-NEXT: s_waitcnt vmcnt(4) ; GFX11-NEXT: v_min_f64 v[80:81], v[26:27], v[82:83] -; GFX11-NEXT: v_cmp_u_f64_e64 s12, v[26:27], v[82:83] +; GFX11-NEXT: v_cmp_o_f64_e64 s12, v[26:27], v[82:83] ; GFX11-NEXT: s_waitcnt vmcnt(2) ; GFX11-NEXT: v_min_f64 v[82:83], v[28:29], v[84:85] -; GFX11-NEXT: v_cmp_u_f64_e64 s13, v[28:29], v[84:85] +; GFX11-NEXT: v_cmp_o_f64_e64 s13, v[28:29], v[84:85] ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_min_f64 v[84:85], v[30:31], v[86:87] -; GFX11-NEXT: v_cmp_u_f64_e64 s14, v[30:31], v[86:87] -; GFX11-NEXT: v_cndmask_b32_e64 v0, v96, 0, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v1, v97, 0x7ff80000, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v2, v32, 0, s0 -; GFX11-NEXT: v_cndmask_b32_e64 v3, v33, 0x7ff80000, s0 -; GFX11-NEXT: v_cndmask_b32_e64 v4, v34, 0, s1 -; GFX11-NEXT: v_cndmask_b32_e64 v5, v35, 0x7ff80000, s1 -; GFX11-NEXT: v_cndmask_b32_e64 v6, v36, 0, s2 -; GFX11-NEXT: v_cndmask_b32_e64 v7, v37, 0x7ff80000, s2 -; GFX11-NEXT: v_cndmask_b32_e64 v8, v38, 0, s3 -; GFX11-NEXT: v_cndmask_b32_e64 v9, v39, 0x7ff80000, s3 -; GFX11-NEXT: v_cndmask_b32_e64 v10, v48, 0, s4 -; GFX11-NEXT: v_cndmask_b32_e64 v11, v49, 0x7ff80000, s4 -; GFX11-NEXT: v_cndmask_b32_e64 v12, v50, 0, s5 -; GFX11-NEXT: v_cndmask_b32_e64 v13, v51, 0x7ff80000, s5 -; GFX11-NEXT: v_cndmask_b32_e64 v14, v52, 0, s6 -; GFX11-NEXT: v_cndmask_b32_e64 v15, v53, 0x7ff80000, s6 -; GFX11-NEXT: v_cndmask_b32_e64 v16, v54, 0, s7 -; GFX11-NEXT: v_cndmask_b32_e64 v17, v55, 0x7ff80000, s7 -; GFX11-NEXT: v_cndmask_b32_e64 v18, v64, 0, s8 -; GFX11-NEXT: v_cndmask_b32_e64 v19, v65, 0x7ff80000, s8 -; GFX11-NEXT: v_cndmask_b32_e64 v20, v66, 0, s9 -; GFX11-NEXT: v_cndmask_b32_e64 v21, v67, 0x7ff80000, s9 -; GFX11-NEXT: v_cndmask_b32_e64 v22, v68, 0, s10 -; GFX11-NEXT: v_cndmask_b32_e64 v23, v69, 0x7ff80000, s10 -; GFX11-NEXT: v_cndmask_b32_e64 v24, v70, 0, s11 -; GFX11-NEXT: v_cndmask_b32_e64 v25, v71, 0x7ff80000, s11 -; GFX11-NEXT: v_cndmask_b32_e64 v26, v80, 0, s12 -; GFX11-NEXT: v_cndmask_b32_e64 v27, v81, 0x7ff80000, s12 -; GFX11-NEXT: v_cndmask_b32_e64 v28, v82, 0, s13 -; GFX11-NEXT: v_cndmask_b32_e64 v29, v83, 0x7ff80000, s13 -; GFX11-NEXT: v_cndmask_b32_e64 v30, v84, 0, s14 -; GFX11-NEXT: v_cndmask_b32_e64 v31, v85, 0x7ff80000, s14 +; GFX11-NEXT: v_cmp_o_f64_e64 s14, v[30:31], v[86:87] +; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v96, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v97, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, v32, s0 +; GFX11-NEXT: v_cndmask_b32_e64 v3, 0x7ff80000, v33, s0 +; GFX11-NEXT: v_cndmask_b32_e64 v4, 0, v34, s1 +; GFX11-NEXT: v_cndmask_b32_e64 v5, 0x7ff80000, v35, s1 +; GFX11-NEXT: v_cndmask_b32_e64 v6, 0, v36, s2 +; GFX11-NEXT: v_cndmask_b32_e64 v7, 0x7ff80000, v37, s2 +; GFX11-NEXT: v_cndmask_b32_e64 v8, 0, v38, s3 +; GFX11-NEXT: v_cndmask_b32_e64 v9, 0x7ff80000, v39, s3 +; GFX11-NEXT: v_cndmask_b32_e64 v10, 0, v48, s4 +; GFX11-NEXT: v_cndmask_b32_e64 v11, 0x7ff80000, v49, s4 +; GFX11-NEXT: v_cndmask_b32_e64 v12, 0, v50, s5 +; GFX11-NEXT: v_cndmask_b32_e64 v13, 0x7ff80000, v51, s5 +; GFX11-NEXT: v_cndmask_b32_e64 v14, 0, v52, s6 +; GFX11-NEXT: v_cndmask_b32_e64 v15, 0x7ff80000, v53, s6 +; GFX11-NEXT: v_cndmask_b32_e64 v16, 0, v54, s7 +; GFX11-NEXT: v_cndmask_b32_e64 v17, 0x7ff80000, v55, s7 +; GFX11-NEXT: v_cndmask_b32_e64 v18, 0, v64, s8 +; GFX11-NEXT: v_cndmask_b32_e64 v19, 0x7ff80000, v65, s8 +; GFX11-NEXT: v_cndmask_b32_e64 v20, 0, v66, s9 +; GFX11-NEXT: v_cndmask_b32_e64 v21, 0x7ff80000, v67, s9 +; GFX11-NEXT: v_cndmask_b32_e64 v22, 0, v68, s10 +; GFX11-NEXT: v_cndmask_b32_e64 v23, 0x7ff80000, v69, s10 +; GFX11-NEXT: v_cndmask_b32_e64 v24, 0, v70, s11 +; GFX11-NEXT: v_cndmask_b32_e64 v25, 0x7ff80000, v71, s11 +; GFX11-NEXT: v_cndmask_b32_e64 v26, 0, v80, s12 +; GFX11-NEXT: v_cndmask_b32_e64 v27, 0x7ff80000, v81, s12 +; GFX11-NEXT: v_cndmask_b32_e64 v28, 0, v82, s13 +; GFX11-NEXT: v_cndmask_b32_e64 v29, 0x7ff80000, v83, s13 +; GFX11-NEXT: v_cndmask_b32_e64 v30, 0, v84, s14 +; GFX11-NEXT: v_cndmask_b32_e64 v31, 0x7ff80000, v85, s14 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: v_minimum_v16f64: diff --git a/llvm/test/CodeGen/AMDGPU/llvm.round.f64.ll b/llvm/test/CodeGen/AMDGPU/llvm.round.f64.ll index af914bd4043cf..437b4f686271f 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.round.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.round.f64.ll @@ -88,9 +88,9 @@ define amdgpu_kernel void @v_round_f64(ptr addrspace(1) %out, ptr addrspace(1) % ; SI-NEXT: v_not_b32_e32 v4, v4 ; SI-NEXT: v_and_b32_e32 v5, v3, v5 ; SI-NEXT: v_and_b32_e32 v4, v2, v4 -; SI-NEXT: v_cmp_gt_i32_e32 vcc, 0, v6 -; SI-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc -; SI-NEXT: v_cndmask_b32_e64 v4, v4, 0, vcc +; SI-NEXT: v_cmp_le_i32_e32 vcc, 0, v6 +; SI-NEXT: v_cndmask_b32_e32 v5, v7, v5, vcc +; SI-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc ; SI-NEXT: v_cmp_lt_i32_e32 vcc, 51, v6 ; SI-NEXT: v_cndmask_b32_e32 v5, v5, v3, vcc ; SI-NEXT: v_cndmask_b32_e32 v4, v4, v2, vcc diff --git a/llvm/test/CodeGen/AMDGPU/private-memory-atomics.ll b/llvm/test/CodeGen/AMDGPU/private-memory-atomics.ll index 24a4d8fbde200..6deabc8ec5b28 100644 --- a/llvm/test/CodeGen/AMDGPU/private-memory-atomics.ll +++ b/llvm/test/CodeGen/AMDGPU/private-memory-atomics.ll @@ -123,10 +123,10 @@ define i32 @cmpxchg_private_i32(ptr addrspace(5) %ptr) { ; GCN-NEXT: s_mov_b32 s7, 0xf000 ; GCN-NEXT: s_mov_b32 s6, -1 ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; GCN-NEXT: v_cndmask_b32_e64 v2, v1, 1, vcc +; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 +; GCN-NEXT: v_cndmask_b32_e32 v2, 1, v1, vcc ; GCN-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen -; GCN-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GCN-NEXT: v_cndmask_b32_e64 v0, 1, 0, vcc ; GCN-NEXT: buffer_store_byte v0, off, s[4:7], 0 ; GCN-NEXT: s_waitcnt expcnt(0) ; GCN-NEXT: v_mov_b32_e32 v0, v1 @@ -163,12 +163,12 @@ define i64 @cmpxchg_private_i64(ptr addrspace(5) %ptr) { ; GCN-NEXT: s_mov_b32 s7, 0xf000 ; GCN-NEXT: s_mov_b32 s6, -1 ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1] -; GCN-NEXT: v_cndmask_b32_e64 v4, v1, 0, vcc +; GCN-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] +; GCN-NEXT: v_cndmask_b32_e32 v4, 0, v1, vcc ; GCN-NEXT: buffer_store_dword v4, v3, s[0:3], 0 offen -; GCN-NEXT: v_cndmask_b32_e64 v3, v0, 1, vcc +; GCN-NEXT: v_cndmask_b32_e32 v3, 1, v0, vcc ; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; GCN-NEXT: v_cndmask_b32_e64 v4, 1, 0, vcc ; GCN-NEXT: buffer_store_dword v3, v2, s[0:3], 0 offen ; GCN-NEXT: buffer_store_byte v4, off, s[4:7], 0 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) diff --git a/llvm/test/CodeGen/AMDGPU/rem_i128.ll b/llvm/test/CodeGen/AMDGPU/rem_i128.ll index 6512bee36e88b..275e2b99f3ca8 100644 --- a/llvm/test/CodeGen/AMDGPU/rem_i128.ll +++ b/llvm/test/CodeGen/AMDGPU/rem_i128.ll @@ -47,9 +47,9 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-NEXT: s_or_b64 s[4:5], vcc, s[4:5] ; GFX9-NEXT: v_add_co_u32_e32 v7, vcc, 64, v7 ; GFX9-NEXT: v_addc_co_u32_e64 v8, s[6:7], 0, 0, vcc -; GFX9-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] +; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[4:5] ; GFX9-NEXT: v_ffbh_u32_e32 v10, v3 -; GFX9-NEXT: v_cndmask_b32_e32 v6, v7, v6, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v6, v6, v7, vcc ; GFX9-NEXT: v_ffbh_u32_e32 v7, v2 ; GFX9-NEXT: v_add_u32_e32 v7, 32, v7 ; GFX9-NEXT: v_min_u32_e32 v7, v7, v10 @@ -57,13 +57,13 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-NEXT: v_add_u32_e32 v10, 32, v10 ; GFX9-NEXT: v_ffbh_u32_e32 v11, v1 ; GFX9-NEXT: v_min_u32_e32 v10, v10, v11 -; GFX9-NEXT: v_cndmask_b32_e64 v8, v8, 0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v8, 0, v8, vcc ; GFX9-NEXT: v_add_co_u32_e32 v10, vcc, 64, v10 ; GFX9-NEXT: v_addc_co_u32_e64 v11, s[6:7], 0, 0, vcc -; GFX9-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[2:3] +; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[2:3] ; GFX9-NEXT: v_mov_b32_e32 v9, 0 -; GFX9-NEXT: v_cndmask_b32_e32 v7, v10, v7, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v11, v11, 0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v7, v7, v10, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v11, 0, v11, vcc ; GFX9-NEXT: v_sub_co_u32_e32 v6, vcc, v6, v7 ; GFX9-NEXT: v_subb_co_u32_e32 v7, vcc, v8, v11, vcc ; GFX9-NEXT: v_subb_co_u32_e32 v8, vcc, 0, v9, vcc @@ -1518,9 +1518,9 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-NEXT: s_or_b64 s[4:5], vcc, s[4:5] ; GFX9-NEXT: v_add_co_u32_e32 v9, vcc, 64, v9 ; GFX9-NEXT: v_addc_co_u32_e64 v10, s[6:7], 0, 0, vcc -; GFX9-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[6:7] +; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[6:7] ; GFX9-NEXT: v_ffbh_u32_e32 v11, v3 -; GFX9-NEXT: v_cndmask_b32_e32 v8, v9, v8, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v8, v8, v9, vcc ; GFX9-NEXT: v_ffbh_u32_e32 v9, v2 ; GFX9-NEXT: v_add_u32_e32 v9, 32, v9 ; GFX9-NEXT: v_min_u32_e32 v9, v9, v11 @@ -1528,13 +1528,13 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-NEXT: v_add_u32_e32 v11, 32, v11 ; GFX9-NEXT: v_ffbh_u32_e32 v12, v1 ; GFX9-NEXT: v_min_u32_e32 v11, v11, v12 -; GFX9-NEXT: v_cndmask_b32_e64 v10, v10, 0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v10, 0, v10, vcc ; GFX9-NEXT: v_add_co_u32_e32 v11, vcc, 64, v11 ; GFX9-NEXT: v_addc_co_u32_e64 v12, s[6:7], 0, 0, vcc -; GFX9-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[2:3] +; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[2:3] ; GFX9-NEXT: s_mov_b64 s[6:7], 0x7f -; GFX9-NEXT: v_cndmask_b32_e32 v9, v11, v9, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v12, v12, 0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v9, v9, v11, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v12, 0, v12, vcc ; GFX9-NEXT: v_sub_co_u32_e32 v8, vcc, v8, v9 ; GFX9-NEXT: v_subb_co_u32_e32 v9, vcc, v10, v12, vcc ; GFX9-NEXT: v_mov_b32_e32 v11, 0 diff --git a/llvm/test/CodeGen/AMDGPU/shrink-cndmask.ll b/llvm/test/CodeGen/AMDGPU/shrink-cndmask.ll new file mode 100644 index 0000000000000..ceee518ab1c08 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/shrink-cndmask.ll @@ -0,0 +1,721 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck %s -check-prefix=GCN +; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck %s -check-prefix=GCN + +;tests for integer 32 +define amdgpu_cs void @test_i32_sge(i32 %a, i32 %p, i32 %q, ptr addrspace(1) %out) { +; GCN-LABEL: test_i32_sge: +; GCN: ; %bb.0: ; %.entry +; GCN-NEXT: v_cmp_lt_i32_e32 vcc_lo, 1, v0 +; GCN-NEXT: v_dual_cndmask_b32 v0, 0, v1 :: v_dual_cndmask_b32 v1, 0, v2 +; GCN-NEXT: global_store_b64 v[3:4], v[0:1], off +; GCN-NEXT: s_endpgm +.entry: + %vcc = icmp sge i32 %a, 2 + %val1 = select i1 %vcc, i32 %p, i32 0 + %val2 = select i1 %vcc, i32 %q, i32 0 + %ret0 = insertelement <2 x i32> poison, i32 %val1, i32 0 + %ret1 = insertelement <2 x i32> %ret0, i32 %val2, i32 1 + store <2 x i32> %ret1, ptr addrspace(1) %out + ret void +} + +define amdgpu_cs void @test_i32_sle(i32 %a, i32 %p, i32 %q, ptr addrspace(1) %out) { +; GCN-LABEL: test_i32_sle: +; GCN: ; %bb.0: ; %.entry +; GCN-NEXT: v_cmp_gt_i32_e32 vcc_lo, 3, v0 +; GCN-NEXT: v_dual_cndmask_b32 v0, 0, v1 :: v_dual_cndmask_b32 v1, 0, v2 +; GCN-NEXT: global_store_b64 v[3:4], v[0:1], off +; GCN-NEXT: s_endpgm +.entry: + %vcc = icmp sle i32 %a, 2 + %val1 = select i1 %vcc, i32 %p, i32 0 + %val2 = select i1 %vcc, i32 %q, i32 0 + %ret0 = insertelement <2 x i32> poison, i32 %val1, i32 0 + %ret1 = insertelement <2 x i32> %ret0, i32 %val2, i32 1 + store <2 x i32> %ret1, ptr addrspace(1) %out + ret void +} + +define amdgpu_cs void @test_i32_sgt(i32 %a, i32 %p, i32 %q, ptr addrspace(1) %out) { +; GCN-LABEL: test_i32_sgt: +; GCN: ; %bb.0: ; %.entry +; GCN-NEXT: v_cmp_le_i32_e32 vcc_lo, 2, v0 +; GCN-NEXT: v_dual_cndmask_b32 v0, 0, v1 :: v_dual_cndmask_b32 v1, 0, v2 +; GCN-NEXT: global_store_b64 v[3:4], v[0:1], off +; GCN-NEXT: s_endpgm +.entry: + %vcc = icmp sgt i32 2, %a + %val1 = select i1 %vcc, i32 0, i32 %p + %val2 = select i1 %vcc, i32 0, i32 %q + %ret0 = insertelement <2 x i32> poison, i32 %val1, i32 0 + %ret1 = insertelement <2 x i32> %ret0, i32 %val2, i32 1 + store <2 x i32> %ret1, ptr addrspace(1) %out + ret void +} + +define amdgpu_cs void @test_i32_slt(i32 %a, i32 %p, i32 %q, ptr addrspace(1) %out) { +; GCN-LABEL: test_i32_slt: +; GCN: ; %bb.0: ; %.entry +; GCN-NEXT: v_cmp_ge_i32_e32 vcc_lo, 2, v0 +; GCN-NEXT: v_dual_cndmask_b32 v0, 0, v1 :: v_dual_cndmask_b32 v1, 0, v2 +; GCN-NEXT: global_store_b64 v[3:4], v[0:1], off +; GCN-NEXT: s_endpgm +.entry: + %vcc = icmp slt i32 2, %a + %val1 = select i1 %vcc, i32 0, i32 %p + %val2 = select i1 %vcc, i32 0, i32 %q + %ret0 = insertelement <2 x i32> poison, i32 %val1, i32 0 + %ret1 = insertelement <2 x i32> %ret0, i32 %val2, i32 1 + store <2 x i32> %ret1, ptr addrspace(1) %out + ret void +} + +;tests for integer 64 +define amdgpu_cs void @test_i64_sge(i64 %a, i64 %p, i64 %q, ptr addrspace(1) %out) { +; GCN-LABEL: test_i64_sge: +; GCN: ; %bb.0: ; %.entry +; GCN-NEXT: v_cmp_lt_i64_e32 vcc_lo, 1, v[0:1] +; GCN-NEXT: v_dual_cndmask_b32 v1, 0, v3 :: v_dual_cndmask_b32 v0, 0, v2 +; GCN-NEXT: v_dual_cndmask_b32 v3, 0, v5 :: v_dual_cndmask_b32 v2, 0, v4 +; GCN-NEXT: global_store_b128 v[6:7], v[0:3], off +; GCN-NEXT: s_endpgm +.entry: + %vcc = icmp sge i64 %a, 2 + %val1 = select i1 %vcc, i64 %p, i64 0 + %val2 = select i1 %vcc, i64 %q, i64 0 + %ret0 = insertelement <2 x i64> poison, i64 %val1, i64 0 + %ret1 = insertelement <2 x i64> %ret0, i64 %val2, i64 1 + store <2 x i64> %ret1, ptr addrspace(1) %out + ret void +} + +define amdgpu_cs void @test_i64_sle(i64 %a, i64 %p, i64 %q, ptr addrspace(1) %out) { +; GCN-LABEL: test_i64_sle: +; GCN: ; %bb.0: ; %.entry +; GCN-NEXT: v_cmp_gt_i64_e32 vcc_lo, 3, v[0:1] +; GCN-NEXT: v_dual_cndmask_b32 v1, 0, v3 :: v_dual_cndmask_b32 v0, 0, v2 +; GCN-NEXT: v_dual_cndmask_b32 v3, 0, v5 :: v_dual_cndmask_b32 v2, 0, v4 +; GCN-NEXT: global_store_b128 v[6:7], v[0:3], off +; GCN-NEXT: s_endpgm +.entry: + %vcc = icmp sle i64 %a, 2 + %val1 = select i1 %vcc, i64 %p, i64 0 + %val2 = select i1 %vcc, i64 %q, i64 0 + %ret0 = insertelement <2 x i64> poison, i64 %val1, i64 0 + %ret1 = insertelement <2 x i64> %ret0, i64 %val2, i64 1 + store <2 x i64> %ret1, ptr addrspace(1) %out + ret void +} + +define amdgpu_cs void @test_i64_sgt(i64 %a, i64 %p, i64 %q, ptr addrspace(1) %out) { +; GCN-LABEL: test_i64_sgt: +; GCN: ; %bb.0: ; %.entry +; GCN-NEXT: v_cmp_le_i64_e32 vcc_lo, 2, v[0:1] +; GCN-NEXT: v_dual_cndmask_b32 v1, 0, v3 :: v_dual_cndmask_b32 v0, 0, v2 +; GCN-NEXT: v_dual_cndmask_b32 v3, 0, v5 :: v_dual_cndmask_b32 v2, 0, v4 +; GCN-NEXT: global_store_b128 v[6:7], v[0:3], off +; GCN-NEXT: s_endpgm +.entry: + %vcc = icmp sgt i64 2, %a + %val1 = select i1 %vcc, i64 0, i64 %p + %val2 = select i1 %vcc, i64 0, i64 %q + %ret0 = insertelement <2 x i64> poison, i64 %val1, i64 0 + %ret1 = insertelement <2 x i64> %ret0, i64 %val2, i64 1 + store <2 x i64> %ret1, ptr addrspace(1) %out + ret void +} + +define amdgpu_cs void @test_i64_slt(i64 %a, i64 %p, i64 %q, ptr addrspace(1) %out) { +; GCN-LABEL: test_i64_slt: +; GCN: ; %bb.0: ; %.entry +; GCN-NEXT: v_cmp_ge_i64_e32 vcc_lo, 2, v[0:1] +; GCN-NEXT: v_dual_cndmask_b32 v1, 0, v3 :: v_dual_cndmask_b32 v0, 0, v2 +; GCN-NEXT: v_dual_cndmask_b32 v3, 0, v5 :: v_dual_cndmask_b32 v2, 0, v4 +; GCN-NEXT: global_store_b128 v[6:7], v[0:3], off +; GCN-NEXT: s_endpgm +.entry: + %vcc = icmp slt i64 2, %a + %val1 = select i1 %vcc, i64 0, i64 %p + %val2 = select i1 %vcc, i64 0, i64 %q + %ret0 = insertelement <2 x i64> poison, i64 %val1, i64 0 + %ret1 = insertelement <2 x i64> %ret0, i64 %val2, i64 1 + store <2 x i64> %ret1, ptr addrspace(1) %out + ret void +} + +;tests for unsigned 32 +define amdgpu_cs void @test_u32_eq(i32 %a, i32 %p, i32 %q, ptr addrspace(1) %out) { +; GCN-LABEL: test_u32_eq: +; GCN: ; %bb.0: ; %.entry +; GCN-NEXT: v_cmp_ne_u32_e32 vcc_lo, 1, v0 +; GCN-NEXT: v_dual_cndmask_b32 v0, 0, v1 :: v_dual_cndmask_b32 v1, 0, v2 +; GCN-NEXT: global_store_b64 v[3:4], v[0:1], off +; GCN-NEXT: s_endpgm +.entry: + %vcc = icmp eq i32 1, %a + %val1 = select i1 %vcc, i32 0, i32 %p + %val2 = select i1 %vcc, i32 0, i32 %q + %ret0 = insertelement <2 x i32> poison, i32 %val1, i32 0 + %ret1 = insertelement <2 x i32> %ret0, i32 %val2, i32 1 + store <2 x i32> %ret1, ptr addrspace(1) %out + ret void +} + +define amdgpu_cs void @test_negative_case(i32 %a, i32 %p, i32 %q, ptr addrspace(1) %out) { +; GCN-LABEL: test_negative_case: +; GCN: ; %bb.0: ; %.entry +; GCN-NEXT: v_cmp_eq_u32_e32 vcc_lo, -1, v0 +; GCN-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc_lo +; GCN-NEXT: v_cndmask_b32_e64 v1, v2, 0, vcc_lo +; GCN-NEXT: global_store_b64 v[3:4], v[0:1], off +; GCN-NEXT: s_endpgm +.entry: + %vcc = icmp eq i32 %a, -1 + %val1 = select i1 %vcc, i32 %p, i32 0 + %val2 = select i1 %vcc, i32 0, i32 %q + %ret0 = insertelement <2 x i32> poison, i32 %val1, i32 0 + %ret1 = insertelement <2 x i32> %ret0, i32 %val2, i32 1 + store <2 x i32> %ret1, ptr addrspace(1) %out + ret void +} + +define amdgpu_cs void @test_mixed(i32 %a, i32 %p, i32 %q, i32 %r, i32 %s, ptr addrspace(1) %out) { +; GCN-LABEL: test_mixed: +; GCN: ; %bb.0: ; %.entry +; GCN-NEXT: v_cmp_ne_u32_e32 vcc_lo, -1, v0 +; GCN-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc_lo +; GCN-NEXT: v_cndmask_b32_e64 v1, v2, 0, vcc_lo +; GCN-NEXT: v_dual_cndmask_b32 v2, 0, v3 :: v_dual_cndmask_b32 v3, 0, v4 +; GCN-NEXT: global_store_b128 v[5:6], v[0:3], off +; GCN-NEXT: s_endpgm +.entry: + %vcc = icmp eq i32 -1, %a + %val1 = select i1 %vcc, i32 0, i32 %p + %val2 = select i1 %vcc, i32 %q, i32 0 + %val3 = select i1 %vcc, i32 0, i32 %r + %val4 = select i1 %vcc, i32 0, i32 %s + %ret0 = insertelement <4 x i32> poison, i32 %val1, i32 0 + %ret1 = insertelement <4 x i32> %ret0, i32 %val2, i32 1 + %ret2 = insertelement <4 x i32> %ret1, i32 %val3, i32 2 + %ret3 = insertelement <4 x i32> %ret2, i32 %val4, i32 3 + store <4 x i32> %ret3, ptr addrspace(1) %out + ret void +} + +define amdgpu_cs void @test_sgpr(i32 %a, i32 %p, i32 inreg %q, i32 inreg %r, ptr addrspace(1) %out) { +; GCN-LABEL: test_sgpr: +; GCN: ; %bb.0: ; %.entry +; GCN-NEXT: v_cmp_eq_u32_e32 vcc_lo, -1, v0 +; GCN-NEXT: v_cndmask_b32_e32 v4, 0, v1, vcc_lo +; GCN-NEXT: v_cndmask_b32_e64 v5, s0, 0, vcc_lo +; GCN-NEXT: v_cndmask_b32_e64 v6, s1, 0, vcc_lo +; GCN-NEXT: global_store_b96 v[2:3], v[4:6], off +; GCN-NEXT: s_endpgm +.entry: + %vcc = icmp eq i32 %a, -1 + %val1 = select i1 %vcc, i32 %p, i32 0 + %val2 = select i1 %vcc, i32 0, i32 %q + %val3 = select i1 %vcc, i32 0, i32 %r + %ret0 = insertelement <3 x i32> poison, i32 %val1, i32 0 + %ret1 = insertelement <3 x i32> %ret0, i32 %val2, i32 1 + %ret2 = insertelement <3 x i32> %ret1, i32 %val3, i32 2 + store <3 x i32> %ret2, ptr addrspace(1) %out + ret void +} + +define amdgpu_cs void @test_u32_ne(i32 %a, i32 %p, i32 %q, ptr addrspace(1) %out) { +; GCN-LABEL: test_u32_ne: +; GCN: ; %bb.0: ; %.entry +; GCN-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 +; GCN-NEXT: v_dual_cndmask_b32 v0, 0, v1 :: v_dual_cndmask_b32 v1, 0, v2 +; GCN-NEXT: global_store_b64 v[3:4], v[0:1], off +; GCN-NEXT: s_endpgm +.entry: + %vcc = icmp ne i32 1, %a + %val1 = select i1 %vcc, i32 0, i32 %p + %val2 = select i1 %vcc, i32 0, i32 %q + %ret0 = insertelement <2 x i32> poison, i32 %val1, i32 0 + %ret1 = insertelement <2 x i32> %ret0, i32 %val2, i32 1 + store <2 x i32> %ret1, ptr addrspace(1) %out + ret void +} + +define amdgpu_cs void @test_u32_uge(i32 %a, i32 %p, i32 %q, ptr addrspace(1) %out) { +; GCN-LABEL: test_u32_uge: +; GCN: ; %bb.0: ; %.entry +; GCN-NEXT: v_cmp_lt_u32_e32 vcc_lo, 1, v0 +; GCN-NEXT: v_dual_cndmask_b32 v0, 0, v1 :: v_dual_cndmask_b32 v1, 0, v2 +; GCN-NEXT: global_store_b64 v[3:4], v[0:1], off +; GCN-NEXT: s_endpgm +.entry: + %vcc = icmp uge i32 %a, 2 + %val1 = select i1 %vcc, i32 %p, i32 0 + %val2 = select i1 %vcc, i32 %q, i32 0 + %ret0 = insertelement <2 x i32> poison, i32 %val1, i32 0 + %ret1 = insertelement <2 x i32> %ret0, i32 %val2, i32 1 + store <2 x i32> %ret1, ptr addrspace(1) %out + ret void +} + +define amdgpu_cs void @test_u32_ule(i32 %a, i32 %p, i32 %q, ptr addrspace(1) %out) { +; GCN-LABEL: test_u32_ule: +; GCN: ; %bb.0: ; %.entry +; GCN-NEXT: v_cmp_gt_u32_e32 vcc_lo, 3, v0 +; GCN-NEXT: v_dual_cndmask_b32 v0, 0, v1 :: v_dual_cndmask_b32 v1, 0, v2 +; GCN-NEXT: global_store_b64 v[3:4], v[0:1], off +; GCN-NEXT: s_endpgm +.entry: + %vcc = icmp ule i32 %a, 2 + %val1 = select i1 %vcc, i32 %p, i32 0 + %val2 = select i1 %vcc, i32 %q, i32 0 + %ret0 = insertelement <2 x i32> poison, i32 %val1, i32 0 + %ret1 = insertelement <2 x i32> %ret0, i32 %val2, i32 1 + store <2 x i32> %ret1, ptr addrspace(1) %out + ret void +} + +define amdgpu_cs void @test_u32_ugt(i32 %a, i32 %p, i32 %q, ptr addrspace(1) %out) { +; GCN-LABEL: test_u32_ugt: +; GCN: ; %bb.0: ; %.entry +; GCN-NEXT: v_cmp_le_u32_e32 vcc_lo, 2, v0 +; GCN-NEXT: v_dual_cndmask_b32 v0, 0, v1 :: v_dual_cndmask_b32 v1, 0, v2 +; GCN-NEXT: global_store_b64 v[3:4], v[0:1], off +; GCN-NEXT: s_endpgm +.entry: + %vcc = icmp ugt i32 2, %a + %val1 = select i1 %vcc, i32 0, i32 %p + %val2 = select i1 %vcc, i32 0, i32 %q + %ret0 = insertelement <2 x i32> poison, i32 %val1, i32 0 + %ret1 = insertelement <2 x i32> %ret0, i32 %val2, i32 1 + store <2 x i32> %ret1, ptr addrspace(1) %out + ret void +} + +define amdgpu_cs void @test_u32_ult(i32 %a, i32 %p, i32 %q, ptr addrspace(1) %out) { +; GCN-LABEL: test_u32_ult: +; GCN: ; %bb.0: ; %.entry +; GCN-NEXT: v_cmp_ge_u32_e32 vcc_lo, 2, v0 +; GCN-NEXT: v_dual_cndmask_b32 v0, 0, v1 :: v_dual_cndmask_b32 v1, 0, v2 +; GCN-NEXT: global_store_b64 v[3:4], v[0:1], off +; GCN-NEXT: s_endpgm +.entry: + %vcc = icmp ult i32 2, %a + %val1 = select i1 %vcc, i32 0, i32 %p + %val2 = select i1 %vcc, i32 0, i32 %q + %ret0 = insertelement <2 x i32> poison, i32 %val1, i32 0 + %ret1 = insertelement <2 x i32> %ret0, i32 %val2, i32 1 + store <2 x i32> %ret1, ptr addrspace(1) %out + ret void +} + +;tests for unsigned 64 +define amdgpu_cs void @test_u64_eq(i64 %a, i64 %p, i64 %q, ptr addrspace(1) %out) { +; GCN-LABEL: test_u64_eq: +; GCN: ; %bb.0: ; %.entry +; GCN-NEXT: v_cmp_ne_u64_e32 vcc_lo, 1, v[0:1] +; GCN-NEXT: v_dual_cndmask_b32 v1, 0, v3 :: v_dual_cndmask_b32 v0, 0, v2 +; GCN-NEXT: v_dual_cndmask_b32 v3, 0, v5 :: v_dual_cndmask_b32 v2, 0, v4 +; GCN-NEXT: global_store_b128 v[6:7], v[0:3], off +; GCN-NEXT: s_endpgm +.entry: + %vcc = icmp eq i64 1, %a + %val1 = select i1 %vcc, i64 0, i64 %p + %val2 = select i1 %vcc, i64 0, i64 %q + %ret0 = insertelement <2 x i64> poison, i64 %val1, i64 0 + %ret1 = insertelement <2 x i64> %ret0, i64 %val2, i64 1 + store <2 x i64> %ret1, ptr addrspace(1) %out + ret void +} + +define amdgpu_cs void @test_u64_ne(i64 %a, i64 %p, i64 %q, ptr addrspace(1) %out) { +; GCN-LABEL: test_u64_ne: +; GCN: ; %bb.0: ; %.entry +; GCN-NEXT: v_cmp_eq_u64_e32 vcc_lo, 1, v[0:1] +; GCN-NEXT: v_dual_cndmask_b32 v1, 0, v3 :: v_dual_cndmask_b32 v0, 0, v2 +; GCN-NEXT: v_dual_cndmask_b32 v3, 0, v5 :: v_dual_cndmask_b32 v2, 0, v4 +; GCN-NEXT: global_store_b128 v[6:7], v[0:3], off +; GCN-NEXT: s_endpgm +.entry: + %vcc = icmp ne i64 1, %a + %val1 = select i1 %vcc, i64 0, i64 %p + %val2 = select i1 %vcc, i64 0, i64 %q + %ret0 = insertelement <2 x i64> poison, i64 %val1, i64 0 + %ret1 = insertelement <2 x i64> %ret0, i64 %val2, i64 1 + store <2 x i64> %ret1, ptr addrspace(1) %out + ret void +} + +define amdgpu_cs void @test_u64_uge(i64 %a, i64 %p, i64 %q, ptr addrspace(1) %out) { +; GCN-LABEL: test_u64_uge: +; GCN: ; %bb.0: ; %.entry +; GCN-NEXT: v_cmp_lt_u64_e32 vcc_lo, 1, v[0:1] +; GCN-NEXT: v_dual_cndmask_b32 v1, 0, v3 :: v_dual_cndmask_b32 v0, 0, v2 +; GCN-NEXT: v_dual_cndmask_b32 v3, 0, v5 :: v_dual_cndmask_b32 v2, 0, v4 +; GCN-NEXT: global_store_b128 v[6:7], v[0:3], off +; GCN-NEXT: s_endpgm +.entry: + %vcc = icmp uge i64 %a, 2 + %val1 = select i1 %vcc, i64 %p, i64 0 + %val2 = select i1 %vcc, i64 %q, i64 0 + %ret0 = insertelement <2 x i64> poison, i64 %val1, i64 0 + %ret1 = insertelement <2 x i64> %ret0, i64 %val2, i64 1 + store <2 x i64> %ret1, ptr addrspace(1) %out + ret void +} + +define amdgpu_cs void @test_u64_ule(i64 %a, i64 %p, i64 %q, ptr addrspace(1) %out) { +; GCN-LABEL: test_u64_ule: +; GCN: ; %bb.0: ; %.entry +; GCN-NEXT: v_cmp_gt_u64_e32 vcc_lo, 3, v[0:1] +; GCN-NEXT: v_dual_cndmask_b32 v1, 0, v3 :: v_dual_cndmask_b32 v0, 0, v2 +; GCN-NEXT: v_dual_cndmask_b32 v3, 0, v5 :: v_dual_cndmask_b32 v2, 0, v4 +; GCN-NEXT: global_store_b128 v[6:7], v[0:3], off +; GCN-NEXT: s_endpgm +.entry: + %vcc = icmp ule i64 %a, 2 + %val1 = select i1 %vcc, i64 %p, i64 0 + %val2 = select i1 %vcc, i64 %q, i64 0 + %ret0 = insertelement <2 x i64> poison, i64 %val1, i64 0 + %ret1 = insertelement <2 x i64> %ret0, i64 %val2, i64 1 + store <2 x i64> %ret1, ptr addrspace(1) %out + ret void +} + +define amdgpu_cs void @test_u64_ugt(i64 %a, i64 %p, i64 %q, ptr addrspace(1) %out) { +; GCN-LABEL: test_u64_ugt: +; GCN: ; %bb.0: ; %.entry +; GCN-NEXT: v_cmp_le_u64_e32 vcc_lo, 2, v[0:1] +; GCN-NEXT: v_dual_cndmask_b32 v1, 0, v3 :: v_dual_cndmask_b32 v0, 0, v2 +; GCN-NEXT: v_dual_cndmask_b32 v3, 0, v5 :: v_dual_cndmask_b32 v2, 0, v4 +; GCN-NEXT: global_store_b128 v[6:7], v[0:3], off +; GCN-NEXT: s_endpgm +.entry: + %vcc = icmp ugt i64 2, %a + %val1 = select i1 %vcc, i64 0, i64 %p + %val2 = select i1 %vcc, i64 0, i64 %q + %ret0 = insertelement <2 x i64> poison, i64 %val1, i64 0 + %ret1 = insertelement <2 x i64> %ret0, i64 %val2, i64 1 + store <2 x i64> %ret1, ptr addrspace(1) %out + ret void +} + +define amdgpu_cs void @test_u64_ult(i64 %a, i64 %p, i64 %q, ptr addrspace(1) %out) { +; GCN-LABEL: test_u64_ult: +; GCN: ; %bb.0: ; %.entry +; GCN-NEXT: v_cmp_ge_u64_e32 vcc_lo, 2, v[0:1] +; GCN-NEXT: v_dual_cndmask_b32 v1, 0, v3 :: v_dual_cndmask_b32 v0, 0, v2 +; GCN-NEXT: v_dual_cndmask_b32 v3, 0, v5 :: v_dual_cndmask_b32 v2, 0, v4 +; GCN-NEXT: global_store_b128 v[6:7], v[0:3], off +; GCN-NEXT: s_endpgm +.entry: + %vcc = icmp ult i64 2, %a + %val1 = select i1 %vcc, i64 0, i64 %p + %val2 = select i1 %vcc, i64 0, i64 %q + %ret0 = insertelement <2 x i64> poison, i64 %val1, i64 0 + %ret1 = insertelement <2 x i64> %ret0, i64 %val2, i64 1 + store <2 x i64> %ret1, ptr addrspace(1) %out + ret void +} + +;tests for float 32 +define amdgpu_cs void @test_f32_oeq(float %a, float %p, float %q, ptr addrspace(1) %out) { +; GCN-LABEL: test_f32_oeq: +; GCN: ; %bb.0: ; %.entry +; GCN-NEXT: v_cmp_neq_f32_e32 vcc_lo, 2.0, v0 +; GCN-NEXT: v_dual_cndmask_b32 v0, 0, v1 :: v_dual_cndmask_b32 v1, 0, v2 +; GCN-NEXT: global_store_b64 v[3:4], v[0:1], off +; GCN-NEXT: s_endpgm +.entry: + %vcc = fcmp oeq float %a, 2.0 + %val1 = select i1 %vcc, float 0.0, float %p + %val2 = select i1 %vcc, float 0.0, float %q + %ret0 = insertelement <2 x float> poison, float %val1, i32 0 + %ret1 = insertelement <2 x float> %ret0, float %val2, i32 1 + store <2 x float> %ret1, ptr addrspace(1) %out + ret void +} + +define amdgpu_cs void @test_f32_negative_modifiers(float %a, float %p, float %q, ptr addrspace(1) %out) { +; GCN-LABEL: test_f32_negative_modifiers: +; GCN: ; %bb.0: ; %.entry +; GCN-NEXT: v_cmp_eq_f32_e32 vcc_lo, 2.0, v0 +; GCN-NEXT: v_cndmask_b32_e64 v0, -v1, 0, vcc_lo +; GCN-NEXT: v_cndmask_b32_e64 v1, -v2, 0, vcc_lo +; GCN-NEXT: global_store_b64 v[3:4], v[0:1], off +; GCN-NEXT: s_endpgm +.entry: + %r = fneg float %p + %s = fneg float %q + %vcc = fcmp oeq float 2.0, %a + %val1 = select i1 %vcc, float 0.0, float %r + %val2 = select i1 %vcc, float 0.0, float %s + %ret0 = insertelement <2 x float> poison, float %val1, i32 0 + %ret1 = insertelement <2 x float> %ret0, float %val2, i32 1 + store <2 x float> %ret1, ptr addrspace(1) %out + ret void +} + +define amdgpu_cs void @test_f32_one(float %a, float %p, float %q, ptr addrspace(1) %out) { +; GCN-LABEL: test_f32_one: +; GCN: ; %bb.0: ; %.entry +; GCN-NEXT: v_cmp_nlg_f32_e32 vcc_lo, 2.0, v0 +; GCN-NEXT: v_dual_cndmask_b32 v0, 0, v1 :: v_dual_cndmask_b32 v1, 0, v2 +; GCN-NEXT: global_store_b64 v[3:4], v[0:1], off +; GCN-NEXT: s_endpgm +.entry: + %vcc = fcmp one float %a, 2.0 + %val1 = select i1 %vcc, float 0.0, float %p + %val2 = select i1 %vcc, float 0.0, float %q + %ret0 = insertelement <2 x float> poison, float %val1, i32 0 + %ret1 = insertelement <2 x float> %ret0, float %val2, i32 1 + store <2 x float> %ret1, ptr addrspace(1) %out + ret void +} + +define amdgpu_cs void @test_f32_ord(float %a, float %p, float %q, ptr addrspace(1) %out) { +; GCN-LABEL: test_f32_ord: +; GCN: ; %bb.0: ; %.entry +; GCN-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 +; GCN-NEXT: v_dual_cndmask_b32 v0, 0, v1 :: v_dual_cndmask_b32 v1, 0, v2 +; GCN-NEXT: global_store_b64 v[3:4], v[0:1], off +; GCN-NEXT: s_endpgm +.entry: + %vcc = fcmp ord float %a, 2.0 + %val1 = select i1 %vcc, float 0.0, float %p + %val2 = select i1 %vcc, float 0.0, float %q + %ret0 = insertelement <2 x float> poison, float %val1, i32 0 + %ret1 = insertelement <2 x float> %ret0, float %val2, i32 1 + store <2 x float> %ret1, ptr addrspace(1) %out + ret void +} + +define amdgpu_cs void @test_f32_uno(float %a, float %p, float %q, ptr addrspace(1) %out) { +; GCN-LABEL: test_f32_uno: +; GCN: ; %bb.0: ; %.entry +; GCN-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v0 +; GCN-NEXT: v_dual_cndmask_b32 v0, 0, v1 :: v_dual_cndmask_b32 v1, 0, v2 +; GCN-NEXT: global_store_b64 v[3:4], v[0:1], off +; GCN-NEXT: s_endpgm +.entry: + %vcc = fcmp uno float %a, 2.0 + %val1 = select i1 %vcc, float 0.0, float %p + %val2 = select i1 %vcc, float 0.0, float %q + %ret0 = insertelement <2 x float> poison, float %val1, i32 0 + %ret1 = insertelement <2 x float> %ret0, float %val2, i32 1 + store <2 x float> %ret1, ptr addrspace(1) %out + ret void +} + +define amdgpu_cs void @test_f32_oge(float %a, float %p, float %q, ptr addrspace(1) %out) { +; GCN-LABEL: test_f32_oge: +; GCN: ; %bb.0: ; %.entry +; GCN-NEXT: v_cmp_nge_f32_e32 vcc_lo, 2.0, v0 +; GCN-NEXT: v_dual_cndmask_b32 v0, 0, v1 :: v_dual_cndmask_b32 v1, 0, v2 +; GCN-NEXT: global_store_b64 v[3:4], v[0:1], off +; GCN-NEXT: s_endpgm +.entry: + %vcc = fcmp oge float 2.0, %a + %val1 = select i1 %vcc, float 0.0, float %p + %val2 = select i1 %vcc, float 0.0, float %q + %ret0 = insertelement <2 x float> poison, float %val1, i32 0 + %ret1 = insertelement <2 x float> %ret0, float %val2, i32 1 + store <2 x float> %ret1, ptr addrspace(1) %out + ret void +} + +define amdgpu_cs void @test_f32_ole(float %a, float %p, float %q, ptr addrspace(1) %out) { +; GCN-LABEL: test_f32_ole: +; GCN: ; %bb.0: ; %.entry +; GCN-NEXT: v_cmp_nle_f32_e32 vcc_lo, 2.0, v0 +; GCN-NEXT: v_dual_cndmask_b32 v0, 0, v1 :: v_dual_cndmask_b32 v1, 0, v2 +; GCN-NEXT: global_store_b64 v[3:4], v[0:1], off +; GCN-NEXT: s_endpgm +.entry: + %vcc = fcmp ole float 2.0, %a + %val1 = select i1 %vcc, float 0.0, float %p + %val2 = select i1 %vcc, float 0.0, float %q + %ret0 = insertelement <2 x float> poison, float %val1, i32 0 + %ret1 = insertelement <2 x float> %ret0, float %val2, i32 1 + store <2 x float> %ret1, ptr addrspace(1) %out + ret void +} + +define amdgpu_cs void @test_f32_ogt(float %a, float %p, float %q, ptr addrspace(1) %out) { +; GCN-LABEL: test_f32_ogt: +; GCN: ; %bb.0: ; %.entry +; GCN-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 2.0, v0 +; GCN-NEXT: v_dual_cndmask_b32 v0, 0, v1 :: v_dual_cndmask_b32 v1, 0, v2 +; GCN-NEXT: global_store_b64 v[3:4], v[0:1], off +; GCN-NEXT: s_endpgm +.entry: + %vcc = fcmp ogt float 2.0, %a + %val1 = select i1 %vcc, float 0.0, float %p + %val2 = select i1 %vcc, float 0.0, float %q + %ret0 = insertelement <2 x float> poison, float %val1, i32 0 + %ret1 = insertelement <2 x float> %ret0, float %val2, i32 1 + store <2 x float> %ret1, ptr addrspace(1) %out + ret void +} + +define amdgpu_cs void @test_f32_olt(float %a, float %p, float %q, ptr addrspace(1) %out) { +; GCN-LABEL: test_f32_olt: +; GCN: ; %bb.0: ; %.entry +; GCN-NEXT: v_cmp_nlt_f32_e32 vcc_lo, 2.0, v0 +; GCN-NEXT: v_dual_cndmask_b32 v0, 0, v1 :: v_dual_cndmask_b32 v1, 0, v2 +; GCN-NEXT: global_store_b64 v[3:4], v[0:1], off +; GCN-NEXT: s_endpgm +.entry: + %vcc = fcmp olt float 2.0, %a + %val1 = select i1 %vcc, float 0.0, float %p + %val2 = select i1 %vcc, float 0.0, float %q + %ret0 = insertelement <2 x float> poison, float %val1, i32 0 + %ret1 = insertelement <2 x float> %ret0, float %val2, i32 1 + store <2 x float> %ret1, ptr addrspace(1) %out + ret void +} + +;tests for float64 +define amdgpu_cs void @test_f64_oeq(double %a, double %p, double %q, ptr addrspace(1) %out) { +; GCN-LABEL: test_f64_oeq: +; GCN: ; %bb.0: ; %.entry +; GCN-NEXT: v_cmp_neq_f64_e32 vcc_lo, 2.0, v[0:1] +; GCN-NEXT: v_dual_cndmask_b32 v1, 0, v3 :: v_dual_cndmask_b32 v0, 0, v2 +; GCN-NEXT: v_dual_cndmask_b32 v3, 0, v5 :: v_dual_cndmask_b32 v2, 0, v4 +; GCN-NEXT: global_store_b128 v[6:7], v[0:3], off +; GCN-NEXT: s_endpgm +.entry: + %vcc = fcmp oeq double 2.0, %a + %val1 = select i1 %vcc, double 0.0, double %p + %val2 = select i1 %vcc, double 0.0, double %q + %ret0 = insertelement <2 x double> poison, double %val1, i32 0 + %ret1 = insertelement <2 x double> %ret0, double %val2, i32 1 + store <2 x double> %ret1, ptr addrspace(1) %out + ret void +} + +define amdgpu_cs void @test_f64_one(double %a, double %p, double %q, ptr addrspace(1) %out) { +; GCN-LABEL: test_f64_one: +; GCN: ; %bb.0: ; %.entry +; GCN-NEXT: v_cmp_nlg_f64_e32 vcc_lo, 2.0, v[0:1] +; GCN-NEXT: v_dual_cndmask_b32 v1, 0, v3 :: v_dual_cndmask_b32 v0, 0, v2 +; GCN-NEXT: v_dual_cndmask_b32 v3, 0, v5 :: v_dual_cndmask_b32 v2, 0, v4 +; GCN-NEXT: global_store_b128 v[6:7], v[0:3], off +; GCN-NEXT: s_endpgm +.entry: + %vcc = fcmp one double 2.0, %a + %val1 = select i1 %vcc, double 0.0, double %p + %val2 = select i1 %vcc, double 0.0, double %q + %ret0 = insertelement <2 x double> poison, double %val1, i32 0 + %ret1 = insertelement <2 x double> %ret0, double %val2, i32 1 + store <2 x double> %ret1, ptr addrspace(1) %out + ret void +} + +define amdgpu_cs void @test_f64_oge(double %a, double %p, double %q, ptr addrspace(1) %out) { +; GCN-LABEL: test_f64_oge: +; GCN: ; %bb.0: ; %.entry +; GCN-NEXT: v_cmp_nge_f64_e32 vcc_lo, 2.0, v[0:1] +; GCN-NEXT: v_dual_cndmask_b32 v1, 0, v3 :: v_dual_cndmask_b32 v0, 0, v2 +; GCN-NEXT: v_dual_cndmask_b32 v3, 0, v5 :: v_dual_cndmask_b32 v2, 0, v4 +; GCN-NEXT: global_store_b128 v[6:7], v[0:3], off +; GCN-NEXT: s_endpgm +.entry: + %vcc = fcmp oge double 2.0, %a + %val1 = select i1 %vcc, double 0.0, double %p + %val2 = select i1 %vcc, double 0.0, double %q + %ret0 = insertelement <2 x double> poison, double %val1, i32 0 + %ret1 = insertelement <2 x double> %ret0, double %val2, i32 1 + store <2 x double> %ret1, ptr addrspace(1) %out + ret void +} + +define amdgpu_cs void @test_f64_ole(double %a, double %p, double %q, ptr addrspace(1) %out) { +; GCN-LABEL: test_f64_ole: +; GCN: ; %bb.0: ; %.entry +; GCN-NEXT: v_cmp_nle_f64_e32 vcc_lo, 2.0, v[0:1] +; GCN-NEXT: v_dual_cndmask_b32 v1, 0, v3 :: v_dual_cndmask_b32 v0, 0, v2 +; GCN-NEXT: v_dual_cndmask_b32 v3, 0, v5 :: v_dual_cndmask_b32 v2, 0, v4 +; GCN-NEXT: global_store_b128 v[6:7], v[0:3], off +; GCN-NEXT: s_endpgm +.entry: + %vcc = fcmp ole double 2.0, %a + %val1 = select i1 %vcc, double 0.0, double %p + %val2 = select i1 %vcc, double 0.0, double %q + %ret0 = insertelement <2 x double> poison, double %val1, i32 0 + %ret1 = insertelement <2 x double> %ret0, double %val2, i32 1 + store <2 x double> %ret1, ptr addrspace(1) %out + ret void +} + +define amdgpu_cs void @test_f64_ogt(double %a, double %p, double %q, ptr addrspace(1) %out) { +; GCN-LABEL: test_f64_ogt: +; GCN: ; %bb.0: ; %.entry +; GCN-NEXT: v_cmp_ngt_f64_e32 vcc_lo, 2.0, v[0:1] +; GCN-NEXT: v_dual_cndmask_b32 v1, 0, v3 :: v_dual_cndmask_b32 v0, 0, v2 +; GCN-NEXT: v_dual_cndmask_b32 v3, 0, v5 :: v_dual_cndmask_b32 v2, 0, v4 +; GCN-NEXT: global_store_b128 v[6:7], v[0:3], off +; GCN-NEXT: s_endpgm +.entry: + %vcc = fcmp ogt double 2.0, %a + %val1 = select i1 %vcc, double 0.0, double %p + %val2 = select i1 %vcc, double 0.0, double %q + %ret0 = insertelement <2 x double> poison, double %val1, i32 0 + %ret1 = insertelement <2 x double> %ret0, double %val2, i32 1 + store <2 x double> %ret1, ptr addrspace(1) %out + ret void +} + +define amdgpu_cs void @test_f64_olt(double %a, double %p, double %q, ptr addrspace(1) %out) { +; GCN-LABEL: test_f64_olt: +; GCN: ; %bb.0: ; %.entry +; GCN-NEXT: v_cmp_nlt_f64_e32 vcc_lo, 2.0, v[0:1] +; GCN-NEXT: v_dual_cndmask_b32 v1, 0, v3 :: v_dual_cndmask_b32 v0, 0, v2 +; GCN-NEXT: v_dual_cndmask_b32 v3, 0, v5 :: v_dual_cndmask_b32 v2, 0, v4 +; GCN-NEXT: global_store_b128 v[6:7], v[0:3], off +; GCN-NEXT: s_endpgm +.entry: + %vcc = fcmp olt double 2.0, %a + %val1 = select i1 %vcc, double 0.0, double %p + %val2 = select i1 %vcc, double 0.0, double %q + %ret0 = insertelement <2 x double> poison, double %val1, i32 0 + %ret1 = insertelement <2 x double> %ret0, double %val2, i32 1 + store <2 x double> %ret1, ptr addrspace(1) %out + ret void +} + +define amdgpu_cs void @test_f64_ord(double %a, double %p, double %q, ptr addrspace(1) %out) { +; GCN-LABEL: test_f64_ord: +; GCN: ; %bb.0: ; %.entry +; GCN-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[0:1] +; GCN-NEXT: v_dual_cndmask_b32 v1, 0, v3 :: v_dual_cndmask_b32 v0, 0, v2 +; GCN-NEXT: v_dual_cndmask_b32 v3, 0, v5 :: v_dual_cndmask_b32 v2, 0, v4 +; GCN-NEXT: global_store_b128 v[6:7], v[0:3], off +; GCN-NEXT: s_endpgm +.entry: + %vcc = fcmp ord double 2.0, %a + %val1 = select i1 %vcc, double 0.0, double %p + %val2 = select i1 %vcc, double 0.0, double %q + %ret0 = insertelement <2 x double> poison, double %val1, i32 0 + %ret1 = insertelement <2 x double> %ret0, double %val2, i32 1 + store <2 x double> %ret1, ptr addrspace(1) %out + ret void +} + +define amdgpu_cs void @test_f64_uno(double %a, double %p, double %q, ptr addrspace(1) %out) { +; GCN-LABEL: test_f64_uno: +; GCN: ; %bb.0: ; %.entry +; GCN-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[0:1] +; GCN-NEXT: v_dual_cndmask_b32 v1, 0, v3 :: v_dual_cndmask_b32 v0, 0, v2 +; GCN-NEXT: v_dual_cndmask_b32 v3, 0, v5 :: v_dual_cndmask_b32 v2, 0, v4 +; GCN-NEXT: global_store_b128 v[6:7], v[0:3], off +; GCN-NEXT: s_endpgm +.entry: + %vcc = fcmp uno double 2.0, %a + %val1 = select i1 %vcc, double 0.0, double %p + %val2 = select i1 %vcc, double 0.0, double %q + %ret0 = insertelement <2 x double> poison, double %val1, i32 0 + %ret1 = insertelement <2 x double> %ret0, double %val2, i32 1 + store <2 x double> %ret1, ptr addrspace(1) %out + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/sint_to_fp.f64.ll b/llvm/test/CodeGen/AMDGPU/sint_to_fp.f64.ll index d71d0f78fe1c3..748f626bb55a7 100644 --- a/llvm/test/CodeGen/AMDGPU/sint_to_fp.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/sint_to_fp.f64.ll @@ -382,9 +382,9 @@ define void @v_swap_select_sint_to_fp_i1_vals_f64(ptr addrspace(1) %out, i32 %in ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: v_mov_b32_e32 v4, 0xbff00000 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 +; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 ; GCN-NEXT: v_mov_b32_e32 v3, 0 -; GCN-NEXT: v_cndmask_b32_e64 v4, v4, 0, vcc +; GCN-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc ; GCN-NEXT: flat_store_dwordx2 v[0:1], v[3:4] ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/AMDGPU/uaddsat.ll b/llvm/test/CodeGen/AMDGPU/uaddsat.ll index 79adc9ead62e1..867ad96fd5ed0 100644 --- a/llvm/test/CodeGen/AMDGPU/uaddsat.ll +++ b/llvm/test/CodeGen/AMDGPU/uaddsat.ll @@ -696,9 +696,9 @@ define i64 @v_uaddsat_i64(i64 %lhs, i64 %rhs) { ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-NEXT: v_add_i32_e32 v2, vcc, v0, v2 ; GFX6-NEXT: v_addc_u32_e32 v3, vcc, v1, v3, vcc -; GFX6-NEXT: v_cmp_lt_u64_e32 vcc, v[2:3], v[0:1] -; GFX6-NEXT: v_cndmask_b32_e64 v0, v2, -1, vcc -; GFX6-NEXT: v_cndmask_b32_e64 v1, v3, -1, vcc +; GFX6-NEXT: v_cmp_ge_u64_e32 vcc, v[2:3], v[0:1] +; GFX6-NEXT: v_cndmask_b32_e32 v0, -1, v2, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v1, -1, v3, vcc ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_uaddsat_i64: @@ -706,9 +706,9 @@ define i64 @v_uaddsat_i64(i64 %lhs, i64 %rhs) { ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_add_u32_e32 v2, vcc, v0, v2 ; GFX8-NEXT: v_addc_u32_e32 v3, vcc, v1, v3, vcc -; GFX8-NEXT: v_cmp_lt_u64_e32 vcc, v[2:3], v[0:1] -; GFX8-NEXT: v_cndmask_b32_e64 v0, v2, -1, vcc -; GFX8-NEXT: v_cndmask_b32_e64 v1, v3, -1, vcc +; GFX8-NEXT: v_cmp_ge_u64_e32 vcc, v[2:3], v[0:1] +; GFX8-NEXT: v_cndmask_b32_e32 v0, -1, v2, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v1, -1, v3, vcc ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: v_uaddsat_i64: @@ -716,9 +716,9 @@ define i64 @v_uaddsat_i64(i64 %lhs, i64 %rhs) { ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v0, v2 ; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v1, v3, vcc -; GFX9-NEXT: v_cmp_lt_u64_e32 vcc, v[2:3], v[0:1] -; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, -1, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v1, v3, -1, vcc +; GFX9-NEXT: v_cmp_ge_u64_e32 vcc, v[2:3], v[0:1] +; GFX9-NEXT: v_cndmask_b32_e32 v0, -1, v2, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, -1, v3, vcc ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_uaddsat_i64: @@ -726,9 +726,9 @@ define i64 @v_uaddsat_i64(i64 %lhs, i64 %rhs) { ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v0, v2 ; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, v1, v3, vcc_lo -; GFX10-NEXT: v_cmp_lt_u64_e32 vcc_lo, v[2:3], v[0:1] -; GFX10-NEXT: v_cndmask_b32_e64 v0, v2, -1, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v1, v3, -1, vcc_lo +; GFX10-NEXT: v_cmp_ge_u64_e32 vcc_lo, v[2:3], v[0:1] +; GFX10-NEXT: v_cndmask_b32_e32 v0, -1, v2, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v1, -1, v3, vcc_lo ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_uaddsat_i64: @@ -737,9 +737,8 @@ define i64 @v_uaddsat_i64(i64 %lhs, i64 %rhs) { ; GFX11-NEXT: v_add_co_u32 v2, vcc_lo, v0, v2 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_add_co_ci_u32_e64 v3, null, v1, v3, vcc_lo -; GFX11-NEXT: v_cmp_lt_u64_e32 vcc_lo, v[2:3], v[0:1] -; GFX11-NEXT: v_cndmask_b32_e64 v0, v2, -1, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v1, v3, -1, vcc_lo +; GFX11-NEXT: v_cmp_ge_u64_e32 vcc_lo, v[2:3], v[0:1] +; GFX11-NEXT: v_dual_cndmask_b32 v0, -1, v2 :: v_dual_cndmask_b32 v1, -1, v3 ; GFX11-NEXT: s_setpc_b64 s[30:31] %result = call i64 @llvm.uadd.sat.i64(i64 %lhs, i64 %rhs) ret i64 %result diff --git a/llvm/test/CodeGen/AMDGPU/uint_to_fp.f64.ll b/llvm/test/CodeGen/AMDGPU/uint_to_fp.f64.ll index 97738a7944741..ec832056b0b55 100644 --- a/llvm/test/CodeGen/AMDGPU/uint_to_fp.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/uint_to_fp.f64.ll @@ -614,9 +614,9 @@ define void @v_swap_select_uint_to_fp_i1_vals_f64(ptr addrspace(1) %out, i32 %in ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: v_mov_b32_e32 v4, 0x3ff00000 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 +; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 ; GCN-NEXT: v_mov_b32_e32 v3, 0 -; GCN-NEXT: v_cndmask_b32_e64 v4, v4, 0, vcc +; GCN-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc ; GCN-NEXT: flat_store_dwordx2 v[0:1], v[3:4] ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/AMDGPU/usubsat.ll b/llvm/test/CodeGen/AMDGPU/usubsat.ll index 90491a07289a0..9e2d1eac3277c 100644 --- a/llvm/test/CodeGen/AMDGPU/usubsat.ll +++ b/llvm/test/CodeGen/AMDGPU/usubsat.ll @@ -732,9 +732,9 @@ define i64 @v_usubsat_i64(i64 %lhs, i64 %rhs) { ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v0, v2 ; GFX6-NEXT: v_subb_u32_e32 v3, vcc, v1, v3, vcc -; GFX6-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[0:1] -; GFX6-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc -; GFX6-NEXT: v_cndmask_b32_e64 v1, v3, 0, vcc +; GFX6-NEXT: v_cmp_le_u64_e32 vcc, v[2:3], v[0:1] +; GFX6-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_usubsat_i64: @@ -742,9 +742,9 @@ define i64 @v_usubsat_i64(i64 %lhs, i64 %rhs) { ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_sub_u32_e32 v2, vcc, v0, v2 ; GFX8-NEXT: v_subb_u32_e32 v3, vcc, v1, v3, vcc -; GFX8-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[0:1] -; GFX8-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc -; GFX8-NEXT: v_cndmask_b32_e64 v1, v3, 0, vcc +; GFX8-NEXT: v_cmp_le_u64_e32 vcc, v[2:3], v[0:1] +; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: v_usubsat_i64: @@ -752,9 +752,9 @@ define i64 @v_usubsat_i64(i64 %lhs, i64 %rhs) { ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_sub_co_u32_e32 v2, vcc, v0, v2 ; GFX9-NEXT: v_subb_co_u32_e32 v3, vcc, v1, v3, vcc -; GFX9-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[0:1] -; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v1, v3, 0, vcc +; GFX9-NEXT: v_cmp_le_u64_e32 vcc, v[2:3], v[0:1] +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_usubsat_i64: @@ -762,9 +762,9 @@ define i64 @v_usubsat_i64(i64 %lhs, i64 %rhs) { ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_sub_co_u32 v2, vcc_lo, v0, v2 ; GFX10-NEXT: v_sub_co_ci_u32_e32 v3, vcc_lo, v1, v3, vcc_lo -; GFX10-NEXT: v_cmp_gt_u64_e32 vcc_lo, v[2:3], v[0:1] -; GFX10-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v1, v3, 0, vcc_lo +; GFX10-NEXT: v_cmp_le_u64_e32 vcc_lo, v[2:3], v[0:1] +; GFX10-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc_lo ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_usubsat_i64: @@ -772,9 +772,8 @@ define i64 @v_usubsat_i64(i64 %lhs, i64 %rhs) { ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_sub_co_u32 v2, vcc_lo, v0, v2 ; GFX11-NEXT: v_sub_co_ci_u32_e64 v3, null, v1, v3, vcc_lo -; GFX11-NEXT: v_cmp_gt_u64_e32 vcc_lo, v[2:3], v[0:1] -; GFX11-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v1, v3, 0, vcc_lo +; GFX11-NEXT: v_cmp_le_u64_e32 vcc_lo, v[2:3], v[0:1] +; GFX11-NEXT: v_dual_cndmask_b32 v0, 0, v2 :: v_dual_cndmask_b32 v1, 0, v3 ; GFX11-NEXT: s_setpc_b64 s[30:31] %result = call i64 @llvm.usub.sat.i64(i64 %lhs, i64 %rhs) ret i64 %result diff --git a/llvm/test/CodeGen/AMDGPU/v_cndmask.ll b/llvm/test/CodeGen/AMDGPU/v_cndmask.ll index a41063f467d01..1ef4164a93b9f 100644 --- a/llvm/test/CodeGen/AMDGPU/v_cndmask.ll +++ b/llvm/test/CodeGen/AMDGPU/v_cndmask.ll @@ -2043,9 +2043,9 @@ define amdgpu_kernel void @fcmp_k0_vgprX_select_k1_vgprZ_f32_cond_use_x2(ptr add ; SI-NEXT: buffer_load_dword v3, v[0:1], s[4:7], 0 addr64 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: s_mov_b64 s[2:3], s[10:11] -; SI-NEXT: v_cmp_nle_f32_e32 vcc, 4.0, v2 -; SI-NEXT: v_cndmask_b32_e64 v2, v3, -1.0, vcc -; SI-NEXT: v_cndmask_b32_e64 v3, v3, -2.0, vcc +; SI-NEXT: v_cmp_le_f32_e32 vcc, 4.0, v2 +; SI-NEXT: v_cndmask_b32_e32 v2, -1.0, v3, vcc +; SI-NEXT: v_cndmask_b32_e32 v3, -2.0, v3, vcc ; SI-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_store_dword v3, v[0:1], s[0:3], 0 addr64 @@ -2071,9 +2071,9 @@ define amdgpu_kernel void @fcmp_k0_vgprX_select_k1_vgprZ_f32_cond_use_x2(ptr add ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: v_add_u32_e32 v0, vcc, s0, v4 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; VI-NEXT: v_cmp_nle_f32_e32 vcc, 4.0, v5 -; VI-NEXT: v_cndmask_b32_e64 v3, v2, -1.0, vcc -; VI-NEXT: v_cndmask_b32_e64 v2, v2, -2.0, vcc +; VI-NEXT: v_cmp_le_f32_e32 vcc, 4.0, v5 +; VI-NEXT: v_cndmask_b32_e32 v3, -1.0, v2, vcc +; VI-NEXT: v_cndmask_b32_e32 v2, -2.0, v2, vcc ; VI-NEXT: flat_store_dword v[0:1], v3 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: flat_store_dword v[0:1], v2 @@ -2091,9 +2091,9 @@ define amdgpu_kernel void @fcmp_k0_vgprX_select_k1_vgprZ_f32_cond_use_x2(ptr add ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_load_dword v2, v0, s[6:7] glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_cmp_nle_f32_e32 vcc, 4.0, v1 -; GFX10-NEXT: v_cndmask_b32_e64 v1, v2, -1.0, vcc -; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, -2.0, vcc +; GFX10-NEXT: v_cmp_le_f32_e32 vcc, 4.0, v1 +; GFX10-NEXT: v_cndmask_b32_e32 v1, -1.0, v2, vcc +; GFX10-NEXT: v_cndmask_b32_e32 v2, -2.0, v2, vcc ; GFX10-NEXT: global_store_dword v0, v1, s[0:1] ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_store_dword v0, v2, s[0:1] @@ -2113,9 +2113,9 @@ define amdgpu_kernel void @fcmp_k0_vgprX_select_k1_vgprZ_f32_cond_use_x2(ptr add ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: global_load_b32 v2, v0, s[4:5] glc dlc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: v_cmp_nle_f32_e32 vcc, 4.0, v1 -; GFX11-NEXT: v_cndmask_b32_e64 v1, v2, -1.0, vcc -; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, -2.0, vcc +; GFX11-NEXT: v_cmp_le_f32_e32 vcc, 4.0, v1 +; GFX11-NEXT: v_cndmask_b32_e32 v1, -1.0, v2, vcc +; GFX11-NEXT: v_cndmask_b32_e32 v2, -2.0, v2, vcc ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_store_b32 v0, v2, s[0:1] dlc @@ -2135,9 +2135,9 @@ define amdgpu_kernel void @fcmp_k0_vgprX_select_k1_vgprZ_f32_cond_use_x2(ptr add ; GFX12-NEXT: s_wait_loadcnt 0x0 ; GFX12-NEXT: global_load_b32 v2, v0, s[4:5] scope:SCOPE_SYS ; GFX12-NEXT: s_wait_loadcnt 0x0 -; GFX12-NEXT: v_cmp_nle_f32_e32 vcc, 4.0, v1 -; GFX12-NEXT: v_cndmask_b32_e64 v1, v2, -1.0, vcc -; GFX12-NEXT: v_cndmask_b32_e64 v2, v2, -2.0, vcc +; GFX12-NEXT: v_cmp_le_f32_e32 vcc, 4.0, v1 +; GFX12-NEXT: v_cndmask_b32_e32 v1, -1.0, v2, vcc +; GFX12-NEXT: v_cndmask_b32_e32 v2, -2.0, v2, vcc ; GFX12-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS ; GFX12-NEXT: s_wait_storecnt 0x0 ; GFX12-NEXT: global_store_b32 v0, v2, s[0:1] scope:SCOPE_SYS diff --git a/llvm/test/CodeGen/AMDGPU/vector-reduce-fmaximum.ll b/llvm/test/CodeGen/AMDGPU/vector-reduce-fmaximum.ll index 41fad10051dac..6c71ae5c4d0e8 100644 --- a/llvm/test/CodeGen/AMDGPU/vector-reduce-fmaximum.ll +++ b/llvm/test/CodeGen/AMDGPU/vector-reduce-fmaximum.ll @@ -1729,50 +1729,50 @@ define double @test_vector_reduce_fmaximum_v2double(<2 x double> %v) { ; GFX7: ; %bb.0: ; %entry ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3] -; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3] +; GFX7-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[2:3] ; GFX7-NEXT: v_mov_b32_e32 v1, 0x7ff80000 -; GFX7-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc -; GFX7-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: test_vector_reduce_fmaximum_v2double: ; GFX8: ; %bb.0: ; %entry ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3] -; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3] +; GFX8-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[2:3] ; GFX8-NEXT: v_mov_b32_e32 v1, 0x7ff80000 -; GFX8-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc -; GFX8-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: test_vector_reduce_fmaximum_v2double: ; GFX9: ; %bb.0: ; %entry ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3] -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[2:3] ; GFX9-NEXT: v_mov_b32_e32 v1, 0x7ff80000 ; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc -; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_vector_reduce_fmaximum_v2double: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3] -; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3] -; GFX10-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo +; GFX10-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[2:3] +; GFX10-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v5, vcc_lo ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: test_vector_reduce_fmaximum_v2double: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_max_f64 v[4:5], v[0:1], v[2:3] -; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3] +; GFX11-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[2:3] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v5, vcc_lo ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: test_vector_reduce_fmaximum_v2double: @@ -1794,28 +1794,28 @@ define double @test_vector_reduce_fmaximum_v3double(<3 x double> %v) { ; GFX7: ; %bb.0: ; %entry ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: v_max_f64 v[6:7], v[0:1], v[2:3] -; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3] +; GFX7-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[2:3] ; GFX7-NEXT: v_mov_b32_e32 v8, 0x7ff80000 -; GFX7-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc -; GFX7-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v1, v8, v7, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v6, vcc ; GFX7-NEXT: v_max_f64 v[2:3], v[0:1], v[4:5] -; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5] -; GFX7-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc -; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc +; GFX7-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[4:5] +; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v1, v8, v3, vcc ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: test_vector_reduce_fmaximum_v3double: ; GFX8: ; %bb.0: ; %entry ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_max_f64 v[6:7], v[0:1], v[2:3] -; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3] +; GFX8-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[2:3] ; GFX8-NEXT: v_mov_b32_e32 v8, 0x7ff80000 -; GFX8-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc -; GFX8-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v1, v8, v7, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v6, vcc ; GFX8-NEXT: v_max_f64 v[2:3], v[0:1], v[4:5] -; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5] -; GFX8-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc -; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc +; GFX8-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[4:5] +; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v1, v8, v3, vcc ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: test_vector_reduce_fmaximum_v3double: @@ -1823,44 +1823,44 @@ define double @test_vector_reduce_fmaximum_v3double(<3 x double> %v) { ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_max_f64 v[6:7], v[0:1], v[2:3] ; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000 -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[2:3] ; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v8, v7, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v6, vcc ; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], v[4:5] -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[4:5] ; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v8, v3, vcc ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_vector_reduce_fmaximum_v3double: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_max_f64 v[6:7], v[0:1], v[2:3] -; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3] -; GFX10-NEXT: v_cndmask_b32_e64 v1, v7, 0x7ff80000, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc_lo +; GFX10-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[2:3] +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v7, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v0, 0, v6, vcc_lo ; GFX10-NEXT: v_max_f64 v[2:3], v[0:1], v[4:5] -; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[4:5] -; GFX10-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo +; GFX10-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[4:5] +; GFX10-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: test_vector_reduce_fmaximum_v3double: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_max_f64 v[6:7], v[0:1], v[2:3] -; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3] +; GFX11-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[2:3] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-NEXT: v_cndmask_b32_e64 v1, v7, 0x7ff80000, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v7, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v6, vcc_lo ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_max_f64 v[2:3], v[0:1], v[4:5] -; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[4:5] -; GFX11-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo +; GFX11-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[4:5] +; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) -; GFX11-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: test_vector_reduce_fmaximum_v3double: @@ -1884,36 +1884,36 @@ define double @test_vector_reduce_fmaximum_v4double(<4 x double> %v) { ; GFX7: ; %bb.0: ; %entry ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: v_max_f64 v[8:9], v[0:1], v[2:3] -; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3] +; GFX7-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[2:3] ; GFX7-NEXT: v_mov_b32_e32 v10, 0x7ff80000 -; GFX7-NEXT: v_cndmask_b32_e32 v1, v9, v10, vcc -; GFX7-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v1, v10, v9, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v8, vcc ; GFX7-NEXT: v_max_f64 v[2:3], v[0:1], v[4:5] -; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5] -; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v10, vcc -; GFX7-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX7-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[4:5] +; GFX7-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX7-NEXT: v_max_f64 v[2:3], v[0:1], v[6:7] -; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7] -; GFX7-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc -; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v10, vcc +; GFX7-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[6:7] +; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: test_vector_reduce_fmaximum_v4double: ; GFX8: ; %bb.0: ; %entry ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_max_f64 v[8:9], v[0:1], v[2:3] -; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3] +; GFX8-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[2:3] ; GFX8-NEXT: v_mov_b32_e32 v10, 0x7ff80000 -; GFX8-NEXT: v_cndmask_b32_e32 v1, v9, v10, vcc -; GFX8-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v1, v10, v9, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v8, vcc ; GFX8-NEXT: v_max_f64 v[2:3], v[0:1], v[4:5] -; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5] -; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v10, vcc -; GFX8-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX8-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[4:5] +; GFX8-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX8-NEXT: v_max_f64 v[2:3], v[0:1], v[6:7] -; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7] -; GFX8-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc -; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v10, vcc +; GFX8-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[6:7] +; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: test_vector_reduce_fmaximum_v4double: @@ -1921,58 +1921,58 @@ define double @test_vector_reduce_fmaximum_v4double(<4 x double> %v) { ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_max_f64 v[8:9], v[0:1], v[2:3] ; GFX9-NEXT: v_mov_b32_e32 v10, 0x7ff80000 -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[2:3] ; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v9, v10, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v9, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v8, vcc ; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], v[4:5] -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[4:5] ; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v10, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], v[6:7] -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[6:7] ; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v10, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_vector_reduce_fmaximum_v4double: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_max_f64 v[8:9], v[0:1], v[2:3] -; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3] -; GFX10-NEXT: v_cndmask_b32_e64 v1, v9, 0x7ff80000, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc_lo +; GFX10-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[2:3] +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v9, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v0, 0, v8, vcc_lo ; GFX10-NEXT: v_max_f64 v[2:3], v[0:1], v[4:5] -; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[4:5] -; GFX10-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo +; GFX10-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[4:5] +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo ; GFX10-NEXT: v_max_f64 v[2:3], v[0:1], v[6:7] -; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[6:7] -; GFX10-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo +; GFX10-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[6:7] +; GFX10-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: test_vector_reduce_fmaximum_v4double: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_max_f64 v[8:9], v[0:1], v[2:3] -; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3] +; GFX11-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[2:3] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-NEXT: v_cndmask_b32_e64 v1, v9, 0x7ff80000, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v9, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v8, vcc_lo ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_max_f64 v[2:3], v[0:1], v[4:5] -; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[4:5] -; GFX11-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo +; GFX11-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[4:5] +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo ; GFX11-NEXT: v_max_f64 v[2:3], v[0:1], v[6:7] -; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[6:7] +; GFX11-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[6:7] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: test_vector_reduce_fmaximum_v4double: @@ -1997,68 +1997,68 @@ define double @test_vector_reduce_fmaximum_v8double(<8 x double> %v) { ; GFX7: ; %bb.0: ; %entry ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: v_max_f64 v[16:17], v[0:1], v[2:3] -; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3] +; GFX7-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[2:3] ; GFX7-NEXT: v_mov_b32_e32 v18, 0x7ff80000 -; GFX7-NEXT: v_cndmask_b32_e32 v1, v17, v18, vcc -; GFX7-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v1, v18, v17, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v16, vcc ; GFX7-NEXT: v_max_f64 v[2:3], v[0:1], v[4:5] -; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5] -; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v18, vcc -; GFX7-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX7-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[4:5] +; GFX7-NEXT: v_cndmask_b32_e32 v1, v18, v3, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX7-NEXT: v_max_f64 v[2:3], v[0:1], v[6:7] -; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7] -; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v18, vcc -; GFX7-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX7-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[6:7] +; GFX7-NEXT: v_cndmask_b32_e32 v1, v18, v3, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX7-NEXT: v_max_f64 v[2:3], v[0:1], v[8:9] -; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9] -; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v18, vcc -; GFX7-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX7-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[8:9] +; GFX7-NEXT: v_cndmask_b32_e32 v1, v18, v3, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX7-NEXT: v_max_f64 v[2:3], v[0:1], v[10:11] -; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[10:11] -; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v18, vcc -; GFX7-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX7-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[10:11] +; GFX7-NEXT: v_cndmask_b32_e32 v1, v18, v3, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX7-NEXT: v_max_f64 v[2:3], v[0:1], v[12:13] -; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[12:13] -; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v18, vcc -; GFX7-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX7-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[12:13] +; GFX7-NEXT: v_cndmask_b32_e32 v1, v18, v3, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX7-NEXT: v_max_f64 v[2:3], v[0:1], v[14:15] -; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[14:15] -; GFX7-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc -; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v18, vcc +; GFX7-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[14:15] +; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v1, v18, v3, vcc ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: test_vector_reduce_fmaximum_v8double: ; GFX8: ; %bb.0: ; %entry ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_max_f64 v[16:17], v[0:1], v[2:3] -; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3] +; GFX8-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[2:3] ; GFX8-NEXT: v_mov_b32_e32 v18, 0x7ff80000 -; GFX8-NEXT: v_cndmask_b32_e32 v1, v17, v18, vcc -; GFX8-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v1, v18, v17, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v16, vcc ; GFX8-NEXT: v_max_f64 v[2:3], v[0:1], v[4:5] -; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5] -; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v18, vcc -; GFX8-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX8-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[4:5] +; GFX8-NEXT: v_cndmask_b32_e32 v1, v18, v3, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX8-NEXT: v_max_f64 v[2:3], v[0:1], v[6:7] -; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7] -; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v18, vcc -; GFX8-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX8-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[6:7] +; GFX8-NEXT: v_cndmask_b32_e32 v1, v18, v3, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX8-NEXT: v_max_f64 v[2:3], v[0:1], v[8:9] -; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9] -; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v18, vcc -; GFX8-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX8-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[8:9] +; GFX8-NEXT: v_cndmask_b32_e32 v1, v18, v3, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX8-NEXT: v_max_f64 v[2:3], v[0:1], v[10:11] -; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[10:11] -; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v18, vcc -; GFX8-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX8-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[10:11] +; GFX8-NEXT: v_cndmask_b32_e32 v1, v18, v3, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX8-NEXT: v_max_f64 v[2:3], v[0:1], v[12:13] -; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[12:13] -; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v18, vcc -; GFX8-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX8-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[12:13] +; GFX8-NEXT: v_cndmask_b32_e32 v1, v18, v3, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX8-NEXT: v_max_f64 v[2:3], v[0:1], v[14:15] -; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[14:15] -; GFX8-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc -; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v18, vcc +; GFX8-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[14:15] +; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v1, v18, v3, vcc ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: test_vector_reduce_fmaximum_v8double: @@ -2066,116 +2066,116 @@ define double @test_vector_reduce_fmaximum_v8double(<8 x double> %v) { ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_max_f64 v[16:17], v[0:1], v[2:3] ; GFX9-NEXT: v_mov_b32_e32 v18, 0x7ff80000 -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[2:3] ; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v17, v18, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v18, v17, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v16, vcc ; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], v[4:5] -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[4:5] ; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v18, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v18, v3, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], v[6:7] -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[6:7] ; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v18, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v18, v3, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], v[8:9] -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[8:9] ; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v18, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v18, v3, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], v[10:11] -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[10:11] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[10:11] ; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v18, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v18, v3, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], v[12:13] -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[12:13] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[12:13] ; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v18, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v18, v3, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], v[14:15] -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[14:15] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[14:15] ; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v18, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v18, v3, vcc ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_vector_reduce_fmaximum_v8double: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_max_f64 v[16:17], v[0:1], v[2:3] -; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3] -; GFX10-NEXT: v_cndmask_b32_e64 v1, v17, 0x7ff80000, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc_lo +; GFX10-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[2:3] +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v17, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v0, 0, v16, vcc_lo ; GFX10-NEXT: v_max_f64 v[2:3], v[0:1], v[4:5] -; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[4:5] -; GFX10-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo +; GFX10-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[4:5] +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo ; GFX10-NEXT: v_max_f64 v[2:3], v[0:1], v[6:7] -; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[6:7] -; GFX10-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo +; GFX10-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[6:7] +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo ; GFX10-NEXT: v_max_f64 v[2:3], v[0:1], v[8:9] -; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[8:9] -; GFX10-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo +; GFX10-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[8:9] +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo ; GFX10-NEXT: v_max_f64 v[2:3], v[0:1], v[10:11] -; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[10:11] -; GFX10-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo +; GFX10-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[10:11] +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo ; GFX10-NEXT: v_max_f64 v[2:3], v[0:1], v[12:13] -; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[12:13] -; GFX10-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo +; GFX10-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[12:13] +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo ; GFX10-NEXT: v_max_f64 v[2:3], v[0:1], v[14:15] -; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[14:15] -; GFX10-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo +; GFX10-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[14:15] +; GFX10-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: test_vector_reduce_fmaximum_v8double: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_max_f64 v[16:17], v[0:1], v[2:3] -; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3] +; GFX11-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[2:3] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-NEXT: v_cndmask_b32_e64 v1, v17, 0x7ff80000, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v17, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v16, vcc_lo ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_max_f64 v[2:3], v[0:1], v[4:5] -; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[4:5] -; GFX11-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo +; GFX11-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[4:5] +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo ; GFX11-NEXT: v_max_f64 v[2:3], v[0:1], v[6:7] -; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[6:7] +; GFX11-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[6:7] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_max_f64 v[2:3], v[0:1], v[8:9] -; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[8:9] -; GFX11-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo +; GFX11-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[8:9] +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo ; GFX11-NEXT: v_max_f64 v[2:3], v[0:1], v[10:11] -; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[10:11] +; GFX11-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[10:11] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_max_f64 v[2:3], v[0:1], v[12:13] -; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[12:13] -; GFX11-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo +; GFX11-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[12:13] +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo ; GFX11-NEXT: v_max_f64 v[2:3], v[0:1], v[14:15] -; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[14:15] +; GFX11-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[14:15] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: test_vector_reduce_fmaximum_v8double: @@ -2206,136 +2206,136 @@ define double @test_vector_reduce_fmaximum_v16double(<16 x double> %v) { ; GFX7: ; %bb.0: ; %entry ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: v_max_f64 v[31:32], v[0:1], v[2:3] -; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3] +; GFX7-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[2:3] ; GFX7-NEXT: v_mov_b32_e32 v33, 0x7ff80000 -; GFX7-NEXT: v_cndmask_b32_e32 v1, v32, v33, vcc -; GFX7-NEXT: v_cndmask_b32_e64 v0, v31, 0, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v1, v33, v32, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v31, vcc ; GFX7-NEXT: v_max_f64 v[2:3], v[0:1], v[4:5] -; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5] +; GFX7-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[4:5] ; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 -; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v33, vcc -; GFX7-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v1, v33, v3, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX7-NEXT: v_max_f64 v[2:3], v[0:1], v[6:7] -; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7] -; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v33, vcc -; GFX7-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX7-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[6:7] +; GFX7-NEXT: v_cndmask_b32_e32 v1, v33, v3, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX7-NEXT: v_max_f64 v[2:3], v[0:1], v[8:9] -; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9] -; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v33, vcc -; GFX7-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX7-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[8:9] +; GFX7-NEXT: v_cndmask_b32_e32 v1, v33, v3, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX7-NEXT: v_max_f64 v[2:3], v[0:1], v[10:11] -; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[10:11] -; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v33, vcc -; GFX7-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX7-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[10:11] +; GFX7-NEXT: v_cndmask_b32_e32 v1, v33, v3, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX7-NEXT: v_max_f64 v[2:3], v[0:1], v[12:13] -; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[12:13] -; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v33, vcc -; GFX7-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX7-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[12:13] +; GFX7-NEXT: v_cndmask_b32_e32 v1, v33, v3, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX7-NEXT: v_max_f64 v[2:3], v[0:1], v[14:15] -; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[14:15] -; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v33, vcc -; GFX7-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX7-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[14:15] +; GFX7-NEXT: v_cndmask_b32_e32 v1, v33, v3, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX7-NEXT: v_max_f64 v[2:3], v[0:1], v[16:17] -; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[16:17] -; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v33, vcc -; GFX7-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX7-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[16:17] +; GFX7-NEXT: v_cndmask_b32_e32 v1, v33, v3, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX7-NEXT: v_max_f64 v[2:3], v[0:1], v[18:19] -; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[18:19] -; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v33, vcc -; GFX7-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX7-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[18:19] +; GFX7-NEXT: v_cndmask_b32_e32 v1, v33, v3, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX7-NEXT: v_max_f64 v[2:3], v[0:1], v[20:21] -; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[20:21] -; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v33, vcc -; GFX7-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX7-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[20:21] +; GFX7-NEXT: v_cndmask_b32_e32 v1, v33, v3, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX7-NEXT: v_max_f64 v[2:3], v[0:1], v[22:23] -; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[22:23] -; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v33, vcc -; GFX7-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX7-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[22:23] +; GFX7-NEXT: v_cndmask_b32_e32 v1, v33, v3, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX7-NEXT: v_max_f64 v[2:3], v[0:1], v[24:25] -; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[24:25] -; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v33, vcc -; GFX7-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX7-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[24:25] +; GFX7-NEXT: v_cndmask_b32_e32 v1, v33, v3, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX7-NEXT: v_max_f64 v[2:3], v[0:1], v[26:27] -; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[26:27] -; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v33, vcc -; GFX7-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX7-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[26:27] +; GFX7-NEXT: v_cndmask_b32_e32 v1, v33, v3, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX7-NEXT: v_max_f64 v[2:3], v[0:1], v[28:29] -; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[28:29] -; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v33, vcc -; GFX7-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX7-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[28:29] +; GFX7-NEXT: v_cndmask_b32_e32 v1, v33, v3, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: v_max_f64 v[2:3], v[0:1], v[30:31] -; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[30:31] -; GFX7-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc -; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v33, vcc +; GFX7-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[30:31] +; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v1, v33, v3, vcc ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: test_vector_reduce_fmaximum_v16double: ; GFX8: ; %bb.0: ; %entry ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_max_f64 v[31:32], v[0:1], v[2:3] -; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3] +; GFX8-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[2:3] ; GFX8-NEXT: v_mov_b32_e32 v33, 0x7ff80000 -; GFX8-NEXT: v_cndmask_b32_e32 v1, v32, v33, vcc -; GFX8-NEXT: v_cndmask_b32_e64 v0, v31, 0, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v1, v33, v32, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v31, vcc ; GFX8-NEXT: v_max_f64 v[2:3], v[0:1], v[4:5] -; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5] +; GFX8-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[4:5] ; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 -; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v33, vcc -; GFX8-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v1, v33, v3, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX8-NEXT: v_max_f64 v[2:3], v[0:1], v[6:7] -; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7] -; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v33, vcc -; GFX8-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX8-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[6:7] +; GFX8-NEXT: v_cndmask_b32_e32 v1, v33, v3, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX8-NEXT: v_max_f64 v[2:3], v[0:1], v[8:9] -; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9] -; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v33, vcc -; GFX8-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX8-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[8:9] +; GFX8-NEXT: v_cndmask_b32_e32 v1, v33, v3, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX8-NEXT: v_max_f64 v[2:3], v[0:1], v[10:11] -; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[10:11] -; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v33, vcc -; GFX8-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX8-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[10:11] +; GFX8-NEXT: v_cndmask_b32_e32 v1, v33, v3, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX8-NEXT: v_max_f64 v[2:3], v[0:1], v[12:13] -; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[12:13] -; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v33, vcc -; GFX8-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX8-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[12:13] +; GFX8-NEXT: v_cndmask_b32_e32 v1, v33, v3, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX8-NEXT: v_max_f64 v[2:3], v[0:1], v[14:15] -; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[14:15] -; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v33, vcc -; GFX8-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX8-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[14:15] +; GFX8-NEXT: v_cndmask_b32_e32 v1, v33, v3, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX8-NEXT: v_max_f64 v[2:3], v[0:1], v[16:17] -; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[16:17] -; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v33, vcc -; GFX8-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX8-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[16:17] +; GFX8-NEXT: v_cndmask_b32_e32 v1, v33, v3, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX8-NEXT: v_max_f64 v[2:3], v[0:1], v[18:19] -; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[18:19] -; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v33, vcc -; GFX8-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX8-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[18:19] +; GFX8-NEXT: v_cndmask_b32_e32 v1, v33, v3, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX8-NEXT: v_max_f64 v[2:3], v[0:1], v[20:21] -; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[20:21] -; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v33, vcc -; GFX8-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX8-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[20:21] +; GFX8-NEXT: v_cndmask_b32_e32 v1, v33, v3, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX8-NEXT: v_max_f64 v[2:3], v[0:1], v[22:23] -; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[22:23] -; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v33, vcc -; GFX8-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX8-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[22:23] +; GFX8-NEXT: v_cndmask_b32_e32 v1, v33, v3, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX8-NEXT: v_max_f64 v[2:3], v[0:1], v[24:25] -; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[24:25] -; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v33, vcc -; GFX8-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX8-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[24:25] +; GFX8-NEXT: v_cndmask_b32_e32 v1, v33, v3, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX8-NEXT: v_max_f64 v[2:3], v[0:1], v[26:27] -; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[26:27] -; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v33, vcc -; GFX8-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX8-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[26:27] +; GFX8-NEXT: v_cndmask_b32_e32 v1, v33, v3, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX8-NEXT: v_max_f64 v[2:3], v[0:1], v[28:29] -; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[28:29] -; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v33, vcc -; GFX8-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX8-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[28:29] +; GFX8-NEXT: v_cndmask_b32_e32 v1, v33, v3, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: v_max_f64 v[2:3], v[0:1], v[30:31] -; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[30:31] -; GFX8-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc -; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v33, vcc +; GFX8-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[30:31] +; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v1, v33, v3, vcc ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: test_vector_reduce_fmaximum_v16double: @@ -2344,237 +2344,237 @@ define double @test_vector_reduce_fmaximum_v16double(<16 x double> %v) { ; GFX9-NEXT: scratch_load_dword v31, off, s32 ; GFX9-NEXT: v_max_f64 v[32:33], v[0:1], v[2:3] ; GFX9-NEXT: v_mov_b32_e32 v34, 0x7ff80000 -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[2:3] ; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v33, v34, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v0, v32, 0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v34, v33, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v32, vcc ; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], v[4:5] -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[4:5] ; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v34, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v34, v3, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], v[6:7] -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[6:7] ; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v34, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v34, v3, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], v[8:9] -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[8:9] ; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v34, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v34, v3, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], v[10:11] -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[10:11] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[10:11] ; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v34, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v34, v3, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], v[12:13] -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[12:13] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[12:13] ; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v34, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v34, v3, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], v[14:15] -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[14:15] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[14:15] ; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v34, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v34, v3, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], v[16:17] -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[16:17] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[16:17] ; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v34, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v34, v3, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], v[18:19] -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[18:19] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[18:19] ; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v34, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v34, v3, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], v[20:21] -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[20:21] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[20:21] ; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v34, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v34, v3, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], v[22:23] -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[22:23] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[22:23] ; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v34, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v34, v3, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], v[24:25] -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[24:25] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[24:25] ; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v34, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v34, v3, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], v[26:27] -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[26:27] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[26:27] ; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v34, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v34, v3, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], v[28:29] -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[28:29] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[28:29] ; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v34, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v34, v3, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], v[30:31] -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[30:31] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[30:31] ; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v34, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v34, v3, vcc ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_vector_reduce_fmaximum_v16double: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_max_f64 v[31:32], v[0:1], v[2:3] -; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3] -; GFX10-NEXT: v_cndmask_b32_e64 v1, v32, 0x7ff80000, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v0, v31, 0, vcc_lo +; GFX10-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[2:3] +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v32, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v0, 0, v31, vcc_lo ; GFX10-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; GFX10-NEXT: v_max_f64 v[2:3], v[0:1], v[4:5] -; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[4:5] -; GFX10-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo +; GFX10-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[4:5] +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo ; GFX10-NEXT: v_max_f64 v[2:3], v[0:1], v[6:7] -; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[6:7] -; GFX10-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo +; GFX10-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[6:7] +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo ; GFX10-NEXT: v_max_f64 v[2:3], v[0:1], v[8:9] -; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[8:9] -; GFX10-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo +; GFX10-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[8:9] +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo ; GFX10-NEXT: v_max_f64 v[2:3], v[0:1], v[10:11] -; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[10:11] -; GFX10-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo +; GFX10-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[10:11] +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo ; GFX10-NEXT: v_max_f64 v[2:3], v[0:1], v[12:13] -; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[12:13] -; GFX10-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo +; GFX10-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[12:13] +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo ; GFX10-NEXT: v_max_f64 v[2:3], v[0:1], v[14:15] -; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[14:15] -; GFX10-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo +; GFX10-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[14:15] +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo ; GFX10-NEXT: v_max_f64 v[2:3], v[0:1], v[16:17] -; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[16:17] -; GFX10-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo +; GFX10-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[16:17] +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo ; GFX10-NEXT: v_max_f64 v[2:3], v[0:1], v[18:19] -; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[18:19] -; GFX10-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo +; GFX10-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[18:19] +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo ; GFX10-NEXT: v_max_f64 v[2:3], v[0:1], v[20:21] -; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[20:21] -; GFX10-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo +; GFX10-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[20:21] +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo ; GFX10-NEXT: v_max_f64 v[2:3], v[0:1], v[22:23] -; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[22:23] -; GFX10-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo +; GFX10-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[22:23] +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo ; GFX10-NEXT: v_max_f64 v[2:3], v[0:1], v[24:25] -; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[24:25] -; GFX10-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo +; GFX10-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[24:25] +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo ; GFX10-NEXT: v_max_f64 v[2:3], v[0:1], v[26:27] -; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[26:27] -; GFX10-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo +; GFX10-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[26:27] +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo ; GFX10-NEXT: v_max_f64 v[2:3], v[0:1], v[28:29] -; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[28:29] -; GFX10-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo +; GFX10-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[28:29] +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_max_f64 v[2:3], v[0:1], v[30:31] -; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[30:31] -; GFX10-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo +; GFX10-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[30:31] +; GFX10-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: test_vector_reduce_fmaximum_v16double: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_max_f64 v[31:32], v[0:1], v[2:3] -; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3] +; GFX11-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[2:3] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-NEXT: v_cndmask_b32_e64 v1, v32, 0x7ff80000, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v0, v31, 0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v32, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v31, vcc_lo ; GFX11-NEXT: scratch_load_b32 v31, off, s32 ; GFX11-NEXT: v_max_f64 v[2:3], v[0:1], v[4:5] -; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[4:5] +; GFX11-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[4:5] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_max_f64 v[2:3], v[0:1], v[6:7] -; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[6:7] -; GFX11-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo +; GFX11-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[6:7] +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo ; GFX11-NEXT: v_max_f64 v[2:3], v[0:1], v[8:9] -; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[8:9] +; GFX11-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[8:9] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_max_f64 v[2:3], v[0:1], v[10:11] -; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[10:11] -; GFX11-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo +; GFX11-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[10:11] +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo ; GFX11-NEXT: v_max_f64 v[2:3], v[0:1], v[12:13] -; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[12:13] +; GFX11-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[12:13] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_max_f64 v[2:3], v[0:1], v[14:15] -; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[14:15] -; GFX11-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo +; GFX11-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[14:15] +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo ; GFX11-NEXT: v_max_f64 v[2:3], v[0:1], v[16:17] -; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[16:17] +; GFX11-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[16:17] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_max_f64 v[2:3], v[0:1], v[18:19] -; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[18:19] -; GFX11-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo +; GFX11-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[18:19] +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo ; GFX11-NEXT: v_max_f64 v[2:3], v[0:1], v[20:21] -; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[20:21] +; GFX11-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[20:21] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_max_f64 v[2:3], v[0:1], v[22:23] -; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[22:23] -; GFX11-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo +; GFX11-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[22:23] +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo ; GFX11-NEXT: v_max_f64 v[2:3], v[0:1], v[24:25] -; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[24:25] +; GFX11-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[24:25] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_max_f64 v[2:3], v[0:1], v[26:27] -; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[26:27] -; GFX11-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo +; GFX11-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[26:27] +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo ; GFX11-NEXT: v_max_f64 v[2:3], v[0:1], v[28:29] -; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[28:29] +; GFX11-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[28:29] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_max_f64 v[2:3], v[0:1], v[30:31] -; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[30:31] -; GFX11-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo +; GFX11-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[30:31] +; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) -; GFX11-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: test_vector_reduce_fmaximum_v16double: diff --git a/llvm/test/CodeGen/AMDGPU/vector-reduce-fminimum.ll b/llvm/test/CodeGen/AMDGPU/vector-reduce-fminimum.ll index 61819a85dd82c..c36400fb65988 100644 --- a/llvm/test/CodeGen/AMDGPU/vector-reduce-fminimum.ll +++ b/llvm/test/CodeGen/AMDGPU/vector-reduce-fminimum.ll @@ -1978,50 +1978,50 @@ define double @test_vector_reduce_fminimum_v2double(<2 x double> %v) { ; GFX7: ; %bb.0: ; %entry ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: v_min_f64 v[4:5], v[0:1], v[2:3] -; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3] +; GFX7-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[2:3] ; GFX7-NEXT: v_mov_b32_e32 v1, 0x7ff80000 -; GFX7-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc -; GFX7-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: test_vector_reduce_fminimum_v2double: ; GFX8: ; %bb.0: ; %entry ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_min_f64 v[4:5], v[0:1], v[2:3] -; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3] +; GFX8-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[2:3] ; GFX8-NEXT: v_mov_b32_e32 v1, 0x7ff80000 -; GFX8-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc -; GFX8-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: test_vector_reduce_fminimum_v2double: ; GFX9: ; %bb.0: ; %entry ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_min_f64 v[4:5], v[0:1], v[2:3] -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[2:3] ; GFX9-NEXT: v_mov_b32_e32 v1, 0x7ff80000 ; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc -; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_vector_reduce_fminimum_v2double: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_min_f64 v[4:5], v[0:1], v[2:3] -; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3] -; GFX10-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo +; GFX10-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[2:3] +; GFX10-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v5, vcc_lo ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: test_vector_reduce_fminimum_v2double: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_min_f64 v[4:5], v[0:1], v[2:3] -; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3] +; GFX11-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[2:3] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v1, v5, 0x7ff80000, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v5, vcc_lo ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: test_vector_reduce_fminimum_v2double: @@ -2043,28 +2043,28 @@ define double @test_vector_reduce_fminimum_v3double(<3 x double> %v) { ; GFX7: ; %bb.0: ; %entry ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: v_min_f64 v[6:7], v[0:1], v[2:3] -; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3] +; GFX7-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[2:3] ; GFX7-NEXT: v_mov_b32_e32 v8, 0x7ff80000 -; GFX7-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc -; GFX7-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v1, v8, v7, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v6, vcc ; GFX7-NEXT: v_min_f64 v[2:3], v[0:1], v[4:5] -; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5] -; GFX7-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc -; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc +; GFX7-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[4:5] +; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v1, v8, v3, vcc ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: test_vector_reduce_fminimum_v3double: ; GFX8: ; %bb.0: ; %entry ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_min_f64 v[6:7], v[0:1], v[2:3] -; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3] +; GFX8-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[2:3] ; GFX8-NEXT: v_mov_b32_e32 v8, 0x7ff80000 -; GFX8-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc -; GFX8-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v1, v8, v7, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v6, vcc ; GFX8-NEXT: v_min_f64 v[2:3], v[0:1], v[4:5] -; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5] -; GFX8-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc -; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc +; GFX8-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[4:5] +; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v1, v8, v3, vcc ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: test_vector_reduce_fminimum_v3double: @@ -2072,44 +2072,44 @@ define double @test_vector_reduce_fminimum_v3double(<3 x double> %v) { ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_min_f64 v[6:7], v[0:1], v[2:3] ; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000 -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[2:3] ; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v8, v7, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v6, vcc ; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], v[4:5] -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[4:5] ; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v8, v3, vcc ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_vector_reduce_fminimum_v3double: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_min_f64 v[6:7], v[0:1], v[2:3] -; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3] -; GFX10-NEXT: v_cndmask_b32_e64 v1, v7, 0x7ff80000, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc_lo +; GFX10-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[2:3] +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v7, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v0, 0, v6, vcc_lo ; GFX10-NEXT: v_min_f64 v[2:3], v[0:1], v[4:5] -; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[4:5] -; GFX10-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo +; GFX10-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[4:5] +; GFX10-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: test_vector_reduce_fminimum_v3double: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_min_f64 v[6:7], v[0:1], v[2:3] -; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3] +; GFX11-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[2:3] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-NEXT: v_cndmask_b32_e64 v1, v7, 0x7ff80000, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v7, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v6, vcc_lo ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_min_f64 v[2:3], v[0:1], v[4:5] -; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[4:5] -; GFX11-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo +; GFX11-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[4:5] +; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) -; GFX11-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: test_vector_reduce_fminimum_v3double: @@ -2133,36 +2133,36 @@ define double @test_vector_reduce_fminimum_v4double(<4 x double> %v) { ; GFX7: ; %bb.0: ; %entry ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: v_min_f64 v[8:9], v[0:1], v[2:3] -; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3] +; GFX7-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[2:3] ; GFX7-NEXT: v_mov_b32_e32 v10, 0x7ff80000 -; GFX7-NEXT: v_cndmask_b32_e32 v1, v9, v10, vcc -; GFX7-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v1, v10, v9, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v8, vcc ; GFX7-NEXT: v_min_f64 v[2:3], v[0:1], v[4:5] -; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5] -; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v10, vcc -; GFX7-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX7-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[4:5] +; GFX7-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX7-NEXT: v_min_f64 v[2:3], v[0:1], v[6:7] -; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7] -; GFX7-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc -; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v10, vcc +; GFX7-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[6:7] +; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: test_vector_reduce_fminimum_v4double: ; GFX8: ; %bb.0: ; %entry ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_min_f64 v[8:9], v[0:1], v[2:3] -; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3] +; GFX8-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[2:3] ; GFX8-NEXT: v_mov_b32_e32 v10, 0x7ff80000 -; GFX8-NEXT: v_cndmask_b32_e32 v1, v9, v10, vcc -; GFX8-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v1, v10, v9, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v8, vcc ; GFX8-NEXT: v_min_f64 v[2:3], v[0:1], v[4:5] -; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5] -; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v10, vcc -; GFX8-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX8-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[4:5] +; GFX8-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX8-NEXT: v_min_f64 v[2:3], v[0:1], v[6:7] -; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7] -; GFX8-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc -; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v10, vcc +; GFX8-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[6:7] +; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: test_vector_reduce_fminimum_v4double: @@ -2170,58 +2170,58 @@ define double @test_vector_reduce_fminimum_v4double(<4 x double> %v) { ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_min_f64 v[8:9], v[0:1], v[2:3] ; GFX9-NEXT: v_mov_b32_e32 v10, 0x7ff80000 -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[2:3] ; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v9, v10, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v9, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v8, vcc ; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], v[4:5] -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[4:5] ; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v10, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], v[6:7] -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[6:7] ; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v10, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_vector_reduce_fminimum_v4double: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_min_f64 v[8:9], v[0:1], v[2:3] -; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3] -; GFX10-NEXT: v_cndmask_b32_e64 v1, v9, 0x7ff80000, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc_lo +; GFX10-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[2:3] +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v9, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v0, 0, v8, vcc_lo ; GFX10-NEXT: v_min_f64 v[2:3], v[0:1], v[4:5] -; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[4:5] -; GFX10-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo +; GFX10-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[4:5] +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo ; GFX10-NEXT: v_min_f64 v[2:3], v[0:1], v[6:7] -; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[6:7] -; GFX10-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo +; GFX10-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[6:7] +; GFX10-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: test_vector_reduce_fminimum_v4double: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_min_f64 v[8:9], v[0:1], v[2:3] -; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3] +; GFX11-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[2:3] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-NEXT: v_cndmask_b32_e64 v1, v9, 0x7ff80000, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v9, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v8, vcc_lo ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_min_f64 v[2:3], v[0:1], v[4:5] -; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[4:5] -; GFX11-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo +; GFX11-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[4:5] +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo ; GFX11-NEXT: v_min_f64 v[2:3], v[0:1], v[6:7] -; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[6:7] +; GFX11-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[6:7] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-SDAG-LABEL: test_vector_reduce_fminimum_v4double: @@ -2259,68 +2259,68 @@ define double @test_vector_reduce_fminimum_v8double(<8 x double> %v) { ; GFX7: ; %bb.0: ; %entry ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: v_min_f64 v[16:17], v[0:1], v[2:3] -; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3] +; GFX7-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[2:3] ; GFX7-NEXT: v_mov_b32_e32 v18, 0x7ff80000 -; GFX7-NEXT: v_cndmask_b32_e32 v1, v17, v18, vcc -; GFX7-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v1, v18, v17, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v16, vcc ; GFX7-NEXT: v_min_f64 v[2:3], v[0:1], v[4:5] -; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5] -; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v18, vcc -; GFX7-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX7-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[4:5] +; GFX7-NEXT: v_cndmask_b32_e32 v1, v18, v3, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX7-NEXT: v_min_f64 v[2:3], v[0:1], v[6:7] -; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7] -; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v18, vcc -; GFX7-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX7-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[6:7] +; GFX7-NEXT: v_cndmask_b32_e32 v1, v18, v3, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX7-NEXT: v_min_f64 v[2:3], v[0:1], v[8:9] -; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9] -; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v18, vcc -; GFX7-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX7-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[8:9] +; GFX7-NEXT: v_cndmask_b32_e32 v1, v18, v3, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX7-NEXT: v_min_f64 v[2:3], v[0:1], v[10:11] -; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[10:11] -; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v18, vcc -; GFX7-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX7-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[10:11] +; GFX7-NEXT: v_cndmask_b32_e32 v1, v18, v3, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX7-NEXT: v_min_f64 v[2:3], v[0:1], v[12:13] -; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[12:13] -; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v18, vcc -; GFX7-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX7-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[12:13] +; GFX7-NEXT: v_cndmask_b32_e32 v1, v18, v3, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX7-NEXT: v_min_f64 v[2:3], v[0:1], v[14:15] -; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[14:15] -; GFX7-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc -; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v18, vcc +; GFX7-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[14:15] +; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v1, v18, v3, vcc ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: test_vector_reduce_fminimum_v8double: ; GFX8: ; %bb.0: ; %entry ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_min_f64 v[16:17], v[0:1], v[2:3] -; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3] +; GFX8-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[2:3] ; GFX8-NEXT: v_mov_b32_e32 v18, 0x7ff80000 -; GFX8-NEXT: v_cndmask_b32_e32 v1, v17, v18, vcc -; GFX8-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v1, v18, v17, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v16, vcc ; GFX8-NEXT: v_min_f64 v[2:3], v[0:1], v[4:5] -; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5] -; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v18, vcc -; GFX8-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX8-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[4:5] +; GFX8-NEXT: v_cndmask_b32_e32 v1, v18, v3, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX8-NEXT: v_min_f64 v[2:3], v[0:1], v[6:7] -; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7] -; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v18, vcc -; GFX8-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX8-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[6:7] +; GFX8-NEXT: v_cndmask_b32_e32 v1, v18, v3, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX8-NEXT: v_min_f64 v[2:3], v[0:1], v[8:9] -; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9] -; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v18, vcc -; GFX8-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX8-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[8:9] +; GFX8-NEXT: v_cndmask_b32_e32 v1, v18, v3, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX8-NEXT: v_min_f64 v[2:3], v[0:1], v[10:11] -; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[10:11] -; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v18, vcc -; GFX8-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX8-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[10:11] +; GFX8-NEXT: v_cndmask_b32_e32 v1, v18, v3, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX8-NEXT: v_min_f64 v[2:3], v[0:1], v[12:13] -; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[12:13] -; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v18, vcc -; GFX8-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX8-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[12:13] +; GFX8-NEXT: v_cndmask_b32_e32 v1, v18, v3, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX8-NEXT: v_min_f64 v[2:3], v[0:1], v[14:15] -; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[14:15] -; GFX8-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc -; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v18, vcc +; GFX8-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[14:15] +; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v1, v18, v3, vcc ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: test_vector_reduce_fminimum_v8double: @@ -2328,116 +2328,116 @@ define double @test_vector_reduce_fminimum_v8double(<8 x double> %v) { ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_min_f64 v[16:17], v[0:1], v[2:3] ; GFX9-NEXT: v_mov_b32_e32 v18, 0x7ff80000 -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[2:3] ; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v17, v18, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v18, v17, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v16, vcc ; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], v[4:5] -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[4:5] ; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v18, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v18, v3, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], v[6:7] -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[6:7] ; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v18, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v18, v3, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], v[8:9] -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[8:9] ; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v18, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v18, v3, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], v[10:11] -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[10:11] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[10:11] ; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v18, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v18, v3, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], v[12:13] -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[12:13] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[12:13] ; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v18, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v18, v3, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], v[14:15] -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[14:15] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[14:15] ; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v18, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v18, v3, vcc ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_vector_reduce_fminimum_v8double: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_min_f64 v[16:17], v[0:1], v[2:3] -; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3] -; GFX10-NEXT: v_cndmask_b32_e64 v1, v17, 0x7ff80000, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc_lo +; GFX10-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[2:3] +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v17, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v0, 0, v16, vcc_lo ; GFX10-NEXT: v_min_f64 v[2:3], v[0:1], v[4:5] -; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[4:5] -; GFX10-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo +; GFX10-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[4:5] +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo ; GFX10-NEXT: v_min_f64 v[2:3], v[0:1], v[6:7] -; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[6:7] -; GFX10-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo +; GFX10-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[6:7] +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo ; GFX10-NEXT: v_min_f64 v[2:3], v[0:1], v[8:9] -; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[8:9] -; GFX10-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo +; GFX10-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[8:9] +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo ; GFX10-NEXT: v_min_f64 v[2:3], v[0:1], v[10:11] -; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[10:11] -; GFX10-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo +; GFX10-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[10:11] +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo ; GFX10-NEXT: v_min_f64 v[2:3], v[0:1], v[12:13] -; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[12:13] -; GFX10-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo +; GFX10-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[12:13] +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo ; GFX10-NEXT: v_min_f64 v[2:3], v[0:1], v[14:15] -; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[14:15] -; GFX10-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo +; GFX10-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[14:15] +; GFX10-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: test_vector_reduce_fminimum_v8double: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_min_f64 v[16:17], v[0:1], v[2:3] -; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3] +; GFX11-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[2:3] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-NEXT: v_cndmask_b32_e64 v1, v17, 0x7ff80000, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v17, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v16, vcc_lo ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_min_f64 v[2:3], v[0:1], v[4:5] -; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[4:5] -; GFX11-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo +; GFX11-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[4:5] +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo ; GFX11-NEXT: v_min_f64 v[2:3], v[0:1], v[6:7] -; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[6:7] +; GFX11-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[6:7] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_min_f64 v[2:3], v[0:1], v[8:9] -; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[8:9] -; GFX11-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo +; GFX11-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[8:9] +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo ; GFX11-NEXT: v_min_f64 v[2:3], v[0:1], v[10:11] -; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[10:11] +; GFX11-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[10:11] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_min_f64 v[2:3], v[0:1], v[12:13] -; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[12:13] -; GFX11-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo +; GFX11-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[12:13] +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo ; GFX11-NEXT: v_min_f64 v[2:3], v[0:1], v[14:15] -; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[14:15] +; GFX11-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[14:15] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-SDAG-LABEL: test_vector_reduce_fminimum_v8double: @@ -2486,136 +2486,136 @@ define double @test_vector_reduce_fminimum_v16double(<16 x double> %v) { ; GFX7: ; %bb.0: ; %entry ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: v_min_f64 v[31:32], v[0:1], v[2:3] -; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3] +; GFX7-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[2:3] ; GFX7-NEXT: v_mov_b32_e32 v33, 0x7ff80000 -; GFX7-NEXT: v_cndmask_b32_e32 v1, v32, v33, vcc -; GFX7-NEXT: v_cndmask_b32_e64 v0, v31, 0, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v1, v33, v32, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v31, vcc ; GFX7-NEXT: v_min_f64 v[2:3], v[0:1], v[4:5] -; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5] +; GFX7-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[4:5] ; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 -; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v33, vcc -; GFX7-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v1, v33, v3, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX7-NEXT: v_min_f64 v[2:3], v[0:1], v[6:7] -; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7] -; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v33, vcc -; GFX7-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX7-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[6:7] +; GFX7-NEXT: v_cndmask_b32_e32 v1, v33, v3, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX7-NEXT: v_min_f64 v[2:3], v[0:1], v[8:9] -; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9] -; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v33, vcc -; GFX7-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX7-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[8:9] +; GFX7-NEXT: v_cndmask_b32_e32 v1, v33, v3, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX7-NEXT: v_min_f64 v[2:3], v[0:1], v[10:11] -; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[10:11] -; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v33, vcc -; GFX7-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX7-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[10:11] +; GFX7-NEXT: v_cndmask_b32_e32 v1, v33, v3, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX7-NEXT: v_min_f64 v[2:3], v[0:1], v[12:13] -; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[12:13] -; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v33, vcc -; GFX7-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX7-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[12:13] +; GFX7-NEXT: v_cndmask_b32_e32 v1, v33, v3, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX7-NEXT: v_min_f64 v[2:3], v[0:1], v[14:15] -; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[14:15] -; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v33, vcc -; GFX7-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX7-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[14:15] +; GFX7-NEXT: v_cndmask_b32_e32 v1, v33, v3, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX7-NEXT: v_min_f64 v[2:3], v[0:1], v[16:17] -; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[16:17] -; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v33, vcc -; GFX7-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX7-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[16:17] +; GFX7-NEXT: v_cndmask_b32_e32 v1, v33, v3, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX7-NEXT: v_min_f64 v[2:3], v[0:1], v[18:19] -; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[18:19] -; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v33, vcc -; GFX7-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX7-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[18:19] +; GFX7-NEXT: v_cndmask_b32_e32 v1, v33, v3, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX7-NEXT: v_min_f64 v[2:3], v[0:1], v[20:21] -; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[20:21] -; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v33, vcc -; GFX7-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX7-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[20:21] +; GFX7-NEXT: v_cndmask_b32_e32 v1, v33, v3, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX7-NEXT: v_min_f64 v[2:3], v[0:1], v[22:23] -; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[22:23] -; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v33, vcc -; GFX7-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX7-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[22:23] +; GFX7-NEXT: v_cndmask_b32_e32 v1, v33, v3, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX7-NEXT: v_min_f64 v[2:3], v[0:1], v[24:25] -; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[24:25] -; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v33, vcc -; GFX7-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX7-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[24:25] +; GFX7-NEXT: v_cndmask_b32_e32 v1, v33, v3, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX7-NEXT: v_min_f64 v[2:3], v[0:1], v[26:27] -; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[26:27] -; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v33, vcc -; GFX7-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX7-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[26:27] +; GFX7-NEXT: v_cndmask_b32_e32 v1, v33, v3, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX7-NEXT: v_min_f64 v[2:3], v[0:1], v[28:29] -; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[28:29] -; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v33, vcc -; GFX7-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX7-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[28:29] +; GFX7-NEXT: v_cndmask_b32_e32 v1, v33, v3, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: v_min_f64 v[2:3], v[0:1], v[30:31] -; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[30:31] -; GFX7-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc -; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v33, vcc +; GFX7-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[30:31] +; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v1, v33, v3, vcc ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: test_vector_reduce_fminimum_v16double: ; GFX8: ; %bb.0: ; %entry ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_min_f64 v[31:32], v[0:1], v[2:3] -; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3] +; GFX8-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[2:3] ; GFX8-NEXT: v_mov_b32_e32 v33, 0x7ff80000 -; GFX8-NEXT: v_cndmask_b32_e32 v1, v32, v33, vcc -; GFX8-NEXT: v_cndmask_b32_e64 v0, v31, 0, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v1, v33, v32, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v31, vcc ; GFX8-NEXT: v_min_f64 v[2:3], v[0:1], v[4:5] -; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5] +; GFX8-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[4:5] ; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 -; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v33, vcc -; GFX8-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v1, v33, v3, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX8-NEXT: v_min_f64 v[2:3], v[0:1], v[6:7] -; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7] -; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v33, vcc -; GFX8-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX8-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[6:7] +; GFX8-NEXT: v_cndmask_b32_e32 v1, v33, v3, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX8-NEXT: v_min_f64 v[2:3], v[0:1], v[8:9] -; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9] -; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v33, vcc -; GFX8-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX8-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[8:9] +; GFX8-NEXT: v_cndmask_b32_e32 v1, v33, v3, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX8-NEXT: v_min_f64 v[2:3], v[0:1], v[10:11] -; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[10:11] -; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v33, vcc -; GFX8-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX8-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[10:11] +; GFX8-NEXT: v_cndmask_b32_e32 v1, v33, v3, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX8-NEXT: v_min_f64 v[2:3], v[0:1], v[12:13] -; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[12:13] -; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v33, vcc -; GFX8-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX8-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[12:13] +; GFX8-NEXT: v_cndmask_b32_e32 v1, v33, v3, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX8-NEXT: v_min_f64 v[2:3], v[0:1], v[14:15] -; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[14:15] -; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v33, vcc -; GFX8-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX8-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[14:15] +; GFX8-NEXT: v_cndmask_b32_e32 v1, v33, v3, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX8-NEXT: v_min_f64 v[2:3], v[0:1], v[16:17] -; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[16:17] -; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v33, vcc -; GFX8-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX8-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[16:17] +; GFX8-NEXT: v_cndmask_b32_e32 v1, v33, v3, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX8-NEXT: v_min_f64 v[2:3], v[0:1], v[18:19] -; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[18:19] -; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v33, vcc -; GFX8-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX8-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[18:19] +; GFX8-NEXT: v_cndmask_b32_e32 v1, v33, v3, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX8-NEXT: v_min_f64 v[2:3], v[0:1], v[20:21] -; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[20:21] -; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v33, vcc -; GFX8-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX8-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[20:21] +; GFX8-NEXT: v_cndmask_b32_e32 v1, v33, v3, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX8-NEXT: v_min_f64 v[2:3], v[0:1], v[22:23] -; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[22:23] -; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v33, vcc -; GFX8-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX8-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[22:23] +; GFX8-NEXT: v_cndmask_b32_e32 v1, v33, v3, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX8-NEXT: v_min_f64 v[2:3], v[0:1], v[24:25] -; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[24:25] -; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v33, vcc -; GFX8-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX8-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[24:25] +; GFX8-NEXT: v_cndmask_b32_e32 v1, v33, v3, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX8-NEXT: v_min_f64 v[2:3], v[0:1], v[26:27] -; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[26:27] -; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v33, vcc -; GFX8-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX8-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[26:27] +; GFX8-NEXT: v_cndmask_b32_e32 v1, v33, v3, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX8-NEXT: v_min_f64 v[2:3], v[0:1], v[28:29] -; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[28:29] -; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v33, vcc -; GFX8-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX8-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[28:29] +; GFX8-NEXT: v_cndmask_b32_e32 v1, v33, v3, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: v_min_f64 v[2:3], v[0:1], v[30:31] -; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[30:31] -; GFX8-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc -; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v33, vcc +; GFX8-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[30:31] +; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v1, v33, v3, vcc ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: test_vector_reduce_fminimum_v16double: @@ -2624,237 +2624,237 @@ define double @test_vector_reduce_fminimum_v16double(<16 x double> %v) { ; GFX9-NEXT: scratch_load_dword v31, off, s32 ; GFX9-NEXT: v_min_f64 v[32:33], v[0:1], v[2:3] ; GFX9-NEXT: v_mov_b32_e32 v34, 0x7ff80000 -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[2:3] ; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v33, v34, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v0, v32, 0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v34, v33, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v32, vcc ; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], v[4:5] -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[4:5] ; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v34, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v34, v3, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], v[6:7] -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[6:7] ; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v34, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v34, v3, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], v[8:9] -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[8:9] ; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v34, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v34, v3, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], v[10:11] -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[10:11] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[10:11] ; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v34, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v34, v3, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], v[12:13] -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[12:13] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[12:13] ; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v34, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v34, v3, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], v[14:15] -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[14:15] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[14:15] ; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v34, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v34, v3, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], v[16:17] -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[16:17] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[16:17] ; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v34, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v34, v3, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], v[18:19] -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[18:19] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[18:19] ; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v34, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v34, v3, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], v[20:21] -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[20:21] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[20:21] ; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v34, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v34, v3, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], v[22:23] -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[22:23] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[22:23] ; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v34, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v34, v3, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], v[24:25] -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[24:25] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[24:25] ; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v34, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v34, v3, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], v[26:27] -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[26:27] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[26:27] ; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v34, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v34, v3, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], v[28:29] -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[28:29] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[28:29] ; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v34, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v34, v3, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], v[30:31] -; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[30:31] +; GFX9-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[30:31] ; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v34, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v34, v3, vcc ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_vector_reduce_fminimum_v16double: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_min_f64 v[31:32], v[0:1], v[2:3] -; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3] -; GFX10-NEXT: v_cndmask_b32_e64 v1, v32, 0x7ff80000, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v0, v31, 0, vcc_lo +; GFX10-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[2:3] +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v32, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v0, 0, v31, vcc_lo ; GFX10-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; GFX10-NEXT: v_min_f64 v[2:3], v[0:1], v[4:5] -; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[4:5] -; GFX10-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo +; GFX10-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[4:5] +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo ; GFX10-NEXT: v_min_f64 v[2:3], v[0:1], v[6:7] -; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[6:7] -; GFX10-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo +; GFX10-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[6:7] +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo ; GFX10-NEXT: v_min_f64 v[2:3], v[0:1], v[8:9] -; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[8:9] -; GFX10-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo +; GFX10-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[8:9] +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo ; GFX10-NEXT: v_min_f64 v[2:3], v[0:1], v[10:11] -; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[10:11] -; GFX10-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo +; GFX10-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[10:11] +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo ; GFX10-NEXT: v_min_f64 v[2:3], v[0:1], v[12:13] -; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[12:13] -; GFX10-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo +; GFX10-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[12:13] +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo ; GFX10-NEXT: v_min_f64 v[2:3], v[0:1], v[14:15] -; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[14:15] -; GFX10-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo +; GFX10-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[14:15] +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo ; GFX10-NEXT: v_min_f64 v[2:3], v[0:1], v[16:17] -; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[16:17] -; GFX10-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo +; GFX10-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[16:17] +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo ; GFX10-NEXT: v_min_f64 v[2:3], v[0:1], v[18:19] -; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[18:19] -; GFX10-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo +; GFX10-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[18:19] +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo ; GFX10-NEXT: v_min_f64 v[2:3], v[0:1], v[20:21] -; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[20:21] -; GFX10-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo +; GFX10-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[20:21] +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo ; GFX10-NEXT: v_min_f64 v[2:3], v[0:1], v[22:23] -; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[22:23] -; GFX10-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo +; GFX10-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[22:23] +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo ; GFX10-NEXT: v_min_f64 v[2:3], v[0:1], v[24:25] -; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[24:25] -; GFX10-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo +; GFX10-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[24:25] +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo ; GFX10-NEXT: v_min_f64 v[2:3], v[0:1], v[26:27] -; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[26:27] -; GFX10-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo +; GFX10-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[26:27] +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo ; GFX10-NEXT: v_min_f64 v[2:3], v[0:1], v[28:29] -; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[28:29] -; GFX10-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo +; GFX10-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[28:29] +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_min_f64 v[2:3], v[0:1], v[30:31] -; GFX10-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[30:31] -; GFX10-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo +; GFX10-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[30:31] +; GFX10-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: test_vector_reduce_fminimum_v16double: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_min_f64 v[31:32], v[0:1], v[2:3] -; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[2:3] +; GFX11-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[2:3] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-NEXT: v_cndmask_b32_e64 v1, v32, 0x7ff80000, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v0, v31, 0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v32, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v31, vcc_lo ; GFX11-NEXT: scratch_load_b32 v31, off, s32 ; GFX11-NEXT: v_min_f64 v[2:3], v[0:1], v[4:5] -; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[4:5] +; GFX11-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[4:5] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_min_f64 v[2:3], v[0:1], v[6:7] -; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[6:7] -; GFX11-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo +; GFX11-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[6:7] +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo ; GFX11-NEXT: v_min_f64 v[2:3], v[0:1], v[8:9] -; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[8:9] +; GFX11-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[8:9] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_min_f64 v[2:3], v[0:1], v[10:11] -; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[10:11] -; GFX11-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo +; GFX11-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[10:11] +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo ; GFX11-NEXT: v_min_f64 v[2:3], v[0:1], v[12:13] -; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[12:13] +; GFX11-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[12:13] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_min_f64 v[2:3], v[0:1], v[14:15] -; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[14:15] -; GFX11-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo +; GFX11-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[14:15] +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo ; GFX11-NEXT: v_min_f64 v[2:3], v[0:1], v[16:17] -; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[16:17] +; GFX11-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[16:17] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_min_f64 v[2:3], v[0:1], v[18:19] -; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[18:19] -; GFX11-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo +; GFX11-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[18:19] +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo ; GFX11-NEXT: v_min_f64 v[2:3], v[0:1], v[20:21] -; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[20:21] +; GFX11-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[20:21] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_min_f64 v[2:3], v[0:1], v[22:23] -; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[22:23] -; GFX11-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo +; GFX11-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[22:23] +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo ; GFX11-NEXT: v_min_f64 v[2:3], v[0:1], v[24:25] -; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[24:25] +; GFX11-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[24:25] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_min_f64 v[2:3], v[0:1], v[26:27] -; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[26:27] -; GFX11-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo +; GFX11-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[26:27] +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo ; GFX11-NEXT: v_min_f64 v[2:3], v[0:1], v[28:29] -; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[28:29] +; GFX11-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[28:29] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_min_f64 v[2:3], v[0:1], v[30:31] -; GFX11-NEXT: v_cmp_u_f64_e32 vcc_lo, v[0:1], v[30:31] -; GFX11-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo +; GFX11-NEXT: v_cmp_o_f64_e32 vcc_lo, v[0:1], v[30:31] +; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) -; GFX11-NEXT: v_cndmask_b32_e64 v1, v3, 0x7ff80000, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7ff80000, v3, vcc_lo ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-SDAG-LABEL: test_vector_reduce_fminimum_v16double: