diff --git a/llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h b/llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h index abffcd5dca16f..5ea68e0a64af9 100644 --- a/llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h +++ b/llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h @@ -77,12 +77,11 @@ namespace llvm { struct PhysRegSUOper { SUnit *SU; int OpIdx; - unsigned RegUnit; + unsigned Reg; - PhysRegSUOper(SUnit *su, int op, unsigned R) - : SU(su), OpIdx(op), RegUnit(R) {} + PhysRegSUOper(SUnit *su, int op, unsigned R): SU(su), OpIdx(op), Reg(R) {} - unsigned getSparseSetIndex() const { return RegUnit; } + unsigned getSparseSetIndex() const { return Reg; } }; /// Use a SparseMultiSet to track physical registers. Storage is only diff --git a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp index a42f842b70df7..37a1ef0c8d64d 100644 --- a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -211,8 +211,7 @@ void ScheduleDAGInstrs::addSchedBarrierDeps() { for (const MachineOperand &MO : ExitMI->all_uses()) { Register Reg = MO.getReg(); if (Reg.isPhysical()) { - for (MCRegUnit Unit : TRI->regunits(Reg)) - Uses.insert(PhysRegSUOper(&ExitSU, -1, Unit)); + Uses.insert(PhysRegSUOper(&ExitSU, -1, Reg)); } else if (Reg.isVirtual() && MO.readsReg()) { addVRegUseDeps(&ExitSU, MO.getOperandNo()); } @@ -223,11 +222,8 @@ void ScheduleDAGInstrs::addSchedBarrierDeps() { // uses all the registers that are livein to the successor blocks. for (const MachineBasicBlock *Succ : BB->successors()) { for (const auto &LI : Succ->liveins()) { - // TODO: Use LI.LaneMask to refine this. - for (MCRegUnit Unit : TRI->regunits(LI.PhysReg)) { - if (!Uses.contains(Unit)) - Uses.insert(PhysRegSUOper(&ExitSU, -1, Unit)); - } + if (!Uses.contains(LI.PhysReg)) + Uses.insert(PhysRegSUOper(&ExitSU, -1, LI.PhysReg)); } } } @@ -248,8 +244,8 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) { const MCInstrDesc &DefMIDesc = SU->getInstr()->getDesc(); bool ImplicitPseudoDef = (OperIdx >= DefMIDesc.getNumOperands() && !DefMIDesc.hasImplicitDefOfPhysReg(Reg)); - for (MCRegUnit Unit : TRI->regunits(Reg)) { - for (Reg2SUnitsMap::iterator I = Uses.find(Unit); I != Uses.end(); ++I) { + for (MCRegAliasIterator Alias(Reg, TRI, true); Alias.isValid(); ++Alias) { + for (Reg2SUnitsMap::iterator I = Uses.find(*Alias); I != Uses.end(); ++I) { SUnit *UseSU = I->SU; if (UseSU == SU) continue; @@ -266,14 +262,11 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) { // Set the hasPhysRegDefs only for physreg defs that have a use within // the scheduling region. SU->hasPhysRegDefs = true; - UseInstr = UseSU->getInstr(); - Register UseReg = UseInstr->getOperand(UseOpIdx).getReg(); const MCInstrDesc &UseMIDesc = UseInstr->getDesc(); - ImplicitPseudoUse = UseOpIdx >= ((int)UseMIDesc.getNumOperands()) && - !UseMIDesc.hasImplicitUseOfPhysReg(UseReg); - - Dep = SDep(SU, SDep::Data, UseReg); + ImplicitPseudoUse = (UseOpIdx >= ((int)UseMIDesc.getNumOperands()) && + !UseMIDesc.hasImplicitUseOfPhysReg(*Alias)); + Dep = SDep(SU, SDep::Data, *Alias); } if (!ImplicitPseudoDef && !ImplicitPseudoUse) { Dep.setLatency(SchedModel.computeOperandLatency(SU->getInstr(), OperIdx, @@ -307,16 +300,15 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) { // TODO: Using a latency of 1 here for output dependencies assumes // there's no cost for reusing registers. SDep::Kind Kind = MO.isUse() ? SDep::Anti : SDep::Output; - for (MCRegUnit Unit : TRI->regunits(Reg)) { - for (Reg2SUnitsMap::iterator I = Defs.find(Unit); I != Defs.end(); ++I) { + for (MCRegAliasIterator Alias(Reg, TRI, true); Alias.isValid(); ++Alias) { + for (Reg2SUnitsMap::iterator I = Defs.find(*Alias); I != Defs.end(); ++I) { SUnit *DefSU = I->SU; if (DefSU == &ExitSU) continue; MachineInstr *DefInstr = DefSU->getInstr(); - MachineOperand &DefMO = DefInstr->getOperand(I->OpIdx); - if (DefSU != SU && - (Kind != SDep::Output || !MO.isDead() || !DefMO.isDead())) { - SDep Dep(SU, Kind, DefMO.getReg()); + if (DefSU != SU && (Kind != SDep::Output || !MO.isDead() || + !DefInstr->registerDefIsDead(*Alias))) { + SDep Dep(SU, Kind, /*Reg=*/*Alias); if (Kind != SDep::Anti) { Dep.setLatency( SchedModel.computeOutputLatency(MI, OperIdx, DefInstr)); @@ -332,42 +324,37 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) { // Either insert a new Reg2SUnits entry with an empty SUnits list, or // retrieve the existing SUnits list for this register's uses. // Push this SUnit on the use list. - for (MCRegUnit Unit : TRI->regunits(Reg)) - Uses.insert(PhysRegSUOper(SU, OperIdx, Unit)); + Uses.insert(PhysRegSUOper(SU, OperIdx, Reg)); if (RemoveKillFlags) MO.setIsKill(false); } else { addPhysRegDataDeps(SU, OperIdx); // Clear previous uses and defs of this register and its subregisters. - for (MCRegUnit Unit : TRI->regunits(Reg)) { - Uses.eraseAll(Unit); + for (MCPhysReg SubReg : TRI->subregs_inclusive(Reg)) { + Uses.eraseAll(SubReg); if (!MO.isDead()) - Defs.eraseAll(Unit); + Defs.eraseAll(SubReg); } - if (MO.isDead() && SU->isCall) { // Calls will not be reordered because of chain dependencies (see // below). Since call operands are dead, calls may continue to be added // to the DefList making dependence checking quadratic in the size of // the block. Instead, we leave only one call at the back of the // DefList. - for (MCRegUnit Unit : TRI->regunits(Reg)) { - Reg2SUnitsMap::RangePair P = Defs.equal_range(Unit); - Reg2SUnitsMap::iterator B = P.first; - Reg2SUnitsMap::iterator I = P.second; - for (bool isBegin = I == B; !isBegin; /* empty */) { - isBegin = (--I) == B; - if (!I->SU->isCall) - break; - I = Defs.erase(I); - } + Reg2SUnitsMap::RangePair P = Defs.equal_range(Reg); + Reg2SUnitsMap::iterator B = P.first; + Reg2SUnitsMap::iterator I = P.second; + for (bool isBegin = I == B; !isBegin; /* empty */) { + isBegin = (--I) == B; + if (!I->SU->isCall) + break; + I = Defs.erase(I); } } // Defs are pushed in the order they are visited and never reordered. - for (MCRegUnit Unit : TRI->regunits(Reg)) - Defs.insert(PhysRegSUOper(SU, OperIdx, Unit)); + Defs.insert(PhysRegSUOper(SU, OperIdx, Reg)); } } diff --git a/llvm/test/CodeGen/AMDGPU/fcopysign.f16.ll b/llvm/test/CodeGen/AMDGPU/fcopysign.f16.ll index 667c561ea26f6..165eeb0704ea8 100644 --- a/llvm/test/CodeGen/AMDGPU/fcopysign.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/fcopysign.f16.ll @@ -1363,11 +1363,11 @@ define amdgpu_kernel void @v_copysign_out_f16_mag_f16_sign_f64(ptr addrspace(1) ; VI-NEXT: flat_load_dwordx2 v[1:2], v[1:2] ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: v_mov_b32_e32 v1, s7 -; VI-NEXT: flat_load_ushort v3, v[0:1] ; VI-NEXT: s_movk_i32 s0, 0x7fff +; VI-NEXT: flat_load_ushort v3, v[0:1] +; VI-NEXT: v_lshrrev_b32_e32 v2, 16, v2 ; VI-NEXT: v_mov_b32_e32 v0, s4 ; VI-NEXT: v_mov_b32_e32 v1, s5 -; VI-NEXT: v_lshrrev_b32_e32 v2, 16, v2 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: v_bfi_b32 v2, s0, v3, v2 ; VI-NEXT: flat_store_short v[0:1], v2 diff --git a/llvm/test/CodeGen/AMDGPU/load-global-i16.ll b/llvm/test/CodeGen/AMDGPU/load-global-i16.ll index f957e0368c426..8a7cdf36accb7 100644 --- a/llvm/test/CodeGen/AMDGPU/load-global-i16.ll +++ b/llvm/test/CodeGen/AMDGPU/load-global-i16.ll @@ -3788,13 +3788,13 @@ define amdgpu_kernel void @global_zextload_v64i16_to_v64i32(ptr addrspace(1) %ou ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v33, 16, v2 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v34, 0xffff, v3 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v32, 0xffff, v2 -; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v39, 16, v1 -; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v37, 16, v0 ; GCN-NOHSA-VI-NEXT: buffer_store_dword v32, off, s[88:91], 0 offset:4 ; 4-byte Folded Spill ; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) ; GCN-NOHSA-VI-NEXT: buffer_store_dword v33, off, s[88:91], 0 offset:8 ; 4-byte Folded Spill ; GCN-NOHSA-VI-NEXT: buffer_store_dword v34, off, s[88:91], 0 offset:12 ; 4-byte Folded Spill ; GCN-NOHSA-VI-NEXT: buffer_store_dword v35, off, s[88:91], 0 offset:16 ; 4-byte Folded Spill +; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v39, 16, v1 +; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v37, 16, v0 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v38, 0xffff, v1 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v36, 0xffff, v0 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v3, 16, v29 diff --git a/llvm/test/CodeGen/AMDGPU/schedule-physregdeps.mir b/llvm/test/CodeGen/AMDGPU/schedule-physregdeps.mir index a6ff60af1604c..395a4f827ad6e 100644 --- a/llvm/test/CodeGen/AMDGPU/schedule-physregdeps.mir +++ b/llvm/test/CodeGen/AMDGPU/schedule-physregdeps.mir @@ -4,11 +4,15 @@ # CHECK: SU(0): $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec # CHECK: Successors: # CHECK-NEXT: SU(2): Out Latency=1 +# CHECK-NEXT: SU(4): Out Latency=1 # CHECK-NEXT: SU(2): Data Latency=1 Reg=$vgpr0 +# CHECK-NEXT: SU(4): Data Latency=1 Reg=$vgpr0_vgpr1 # CHECK: SU(1): $vgpr1 = V_MOV_B32_e32 $sgpr0, implicit $exec # CHECK: Successors: # CHECK-NEXT: SU(3): Out Latency=1 +# CHECK-NEXT: SU(4): Out Latency=1 # CHECK-NEXT: SU(3): Data Latency=1 Reg=$vgpr1 +# CHECK-NEXT: SU(4): Data Latency=1 Reg=$vgpr0_vgpr1 # CHECK: SU(2): $vgpr0 = V_ADD_CO_U32_e32 $sgpr2, $vgpr0, implicit-def $vcc, implicit $exec # CHECK: Predecessors: # CHECK-NEXT: SU(0): Out Latency=1 @@ -18,6 +22,7 @@ # CHECK-NEXT: SU(4): Data Latency=1 Reg=$vgpr0_vgpr1 # CHECK-NEXT: SU(3): Out Latency=1 # CHECK-NEXT: SU(3): Data Latency=1 Reg=$vcc +# CHECK-NEXT: SU(4): Anti Latency=0 # CHECK: SU(3): $vgpr1 = V_ADDC_U32_e32 0, $vgpr1, implicit-def dead $vcc, implicit $vcc, implicit $exec # CHECK: Predecessors: # CHECK-NEXT: SU(2): Out Latency=1 @@ -27,12 +32,19 @@ # CHECK: Successors: # CHECK-NEXT: SU(4): Out Latency=1 # CHECK-NEXT: SU(4): Data Latency=1 Reg=$vgpr0_vgpr1 +# CHECK-NEXT: SU(4): Anti Latency=0 # CHECK: SU(4): $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 renamable $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr # CHECK: Predecessors: # CHECK-NEXT: SU(3): Out Latency=1 # CHECK-NEXT: SU(3): Data Latency=1 Reg=$vgpr0_vgpr1 +# CHECK-NEXT: SU(3): Anti Latency=0 # CHECK-NEXT: SU(2): Out Latency=1 # CHECK-NEXT: SU(2): Data Latency=1 Reg=$vgpr0_vgpr1 +# CHECK-NEXT: SU(2): Anti Latency=0 +# CHECK-NEXT: SU(1): Out Latency=1 +# CHECK-NEXT: SU(1): Data Latency=1 Reg=$vgpr0_vgpr1 +# CHECK-NEXT: SU(0): Out Latency=1 +# CHECK-NEXT: SU(0): Data Latency=1 Reg=$vgpr0_vgpr1 # CHECK: Successors: # CHECK-NEXT: ExitSU: Ord Latency=3 Artificial diff --git a/llvm/test/CodeGen/Hexagon/autohvx/fp-to-int.ll b/llvm/test/CodeGen/Hexagon/autohvx/fp-to-int.ll index c678f4f2a37a0..595568bd9e055 100644 --- a/llvm/test/CodeGen/Hexagon/autohvx/fp-to-int.ll +++ b/llvm/test/CodeGen/Hexagon/autohvx/fp-to-int.ll @@ -530,28 +530,28 @@ define void @f32s8_0(ptr %a0, ptr %a1) #0 { ; CHECK-NEXT: v8.w = vasr(v8.w,r6) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: v22 = vsplat(r3) +; CHECK-NEXT: v23 = vsplat(r3) ; CHECK-NEXT: v7.w = vasr(v7.w,r6) -; CHECK-NEXT: v19.w = vsub(v9.w,v1.w) +; CHECK-NEXT: v20.w = vsub(v9.w,v1.w) ; CHECK-NEXT: v8.w = vsub(v10.w,v8.w) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: v20.w = vasl(v6.w,r2) -; CHECK-NEXT: v27 = vmux(q1,v1,v22) -; CHECK-NEXT: v25 = vmux(q0,v1,v22) +; CHECK-NEXT: v21.w = vasl(v6.w,r2) +; CHECK-NEXT: v28 = vmux(q1,v1,v23) +; CHECK-NEXT: v26 = vmux(q0,v1,v23) ; CHECK-NEXT: v7.w = vsub(v10.w,v7.w) ; CHECK-NEXT: } ; CHECK-NEXT: { ; CHECK-NEXT: v5.w = vasl(v4.w,r2) ; CHECK-NEXT: v8.w = vmin(v8.w,v13.w) -; CHECK-NEXT: v9 = vor(v20,v1) -; CHECK-NEXT: v21.w = vmin(v7.w,v13.w) +; CHECK-NEXT: v9 = vor(v21,v1) +; CHECK-NEXT: v22.w = vmin(v7.w,v13.w) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: v4.w = vasr(v19.w,r6) +; CHECK-NEXT: v4.w = vasr(v20.w,r6) ; CHECK-NEXT: q3 = vcmp.gt(v8.w,v12.w) ; CHECK-NEXT: v5 = vor(v5,v1) -; CHECK-NEXT: q2 = vcmp.gt(v21.w,v12.w) +; CHECK-NEXT: q2 = vcmp.gt(v22.w,v12.w) ; CHECK-NEXT: } ; CHECK-NEXT: { ; CHECK-NEXT: v11.w = vasr(v11.w,r6) @@ -563,46 +563,46 @@ define void @f32s8_0(ptr %a0, ptr %a1) #0 { ; CHECK-NEXT: v4.w = vmin(v4.w,v13.w) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: v23.w = vasl(v0.w,r2) +; CHECK-NEXT: v24.w = vasl(v0.w,r2) ; CHECK-NEXT: v3 = vor(v3,v1) ; CHECK-NEXT: v10.w = vmin(v10.w,v13.w) ; CHECK-NEXT: } ; CHECK-NEXT: { ; CHECK-NEXT: v8.w = vlsr(v9.w,v8.w) -; CHECK-NEXT: v6 = vor(v23,v1) +; CHECK-NEXT: v6 = vor(v24,v1) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: v5.w = vlsr(v5.w,v21.w) -; CHECK-NEXT: v26.w = vsub(v12.w,v8.w) +; CHECK-NEXT: v5.w = vlsr(v5.w,v22.w) +; CHECK-NEXT: v27.w = vsub(v12.w,v8.w) ; CHECK-NEXT: } ; CHECK-NEXT: { ; CHECK-NEXT: v3.w = vlsr(v3.w,v4.w) -; CHECK-NEXT: v24.w = vsub(v12.w,v5.w) -; CHECK-NEXT: v8 = vmux(q1,v26,v8) +; CHECK-NEXT: v25.w = vsub(v12.w,v5.w) +; CHECK-NEXT: v8 = vmux(q1,v27,v8) ; CHECK-NEXT: } ; CHECK-NEXT: { ; CHECK-NEXT: v6.w = vlsr(v6.w,v10.w) -; CHECK-NEXT: v5 = vmux(q0,v24,v5) +; CHECK-NEXT: v5 = vmux(q0,v25,v5) ; CHECK-NEXT: q0 = vcmp.gt(v12.w,v2.w) -; CHECK-NEXT: v28.w = vsub(v12.w,v3.w) +; CHECK-NEXT: v29.w = vsub(v12.w,v3.w) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: v2 = vmux(q3,v8,v27) -; CHECK-NEXT: v29.w = vsub(v12.w,v6.w) +; CHECK-NEXT: v2 = vmux(q3,v8,v28) ; CHECK-NEXT: q3 = vcmp.gt(v12.w,v0.w) -; CHECK-NEXT: v5 = vmux(q2,v5,v25) +; CHECK-NEXT: v30.w = vsub(v12.w,v6.w) +; CHECK-NEXT: v5 = vmux(q2,v5,v26) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: v30 = vmux(q0,v1,v22) -; CHECK-NEXT: v3 = vmux(q0,v28,v3) +; CHECK-NEXT: v0 = vmux(q0,v1,v23) +; CHECK-NEXT: v3 = vmux(q0,v29,v3) ; CHECK-NEXT: q2 = vcmp.gt(v4.w,v12.w) -; CHECK-NEXT: v31 = vmux(q3,v29,v6) +; CHECK-NEXT: v31 = vmux(q3,v30,v6) ; CHECK-NEXT: } ; CHECK-NEXT: { ; CHECK-NEXT: v2.h = vpack(v2.w,v5.w):sat -; CHECK-NEXT: v1 = vmux(q3,v1,v22) +; CHECK-NEXT: v1 = vmux(q3,v1,v23) ; CHECK-NEXT: q3 = vcmp.gt(v10.w,v12.w) -; CHECK-NEXT: v0 = vmux(q2,v3,v30) +; CHECK-NEXT: v0 = vmux(q2,v3,v0) ; CHECK-NEXT: } ; CHECK-NEXT: { ; CHECK-NEXT: v1 = vmux(q3,v31,v1) @@ -1581,7 +1581,7 @@ define void @f32u8_0(ptr %a0, ptr %a1) #0 { ; CHECK-NEXT: v12.w = vsub(v12.w,v3.w) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: v21 = vsplat(r4) +; CHECK-NEXT: v23 = vsplat(r4) ; CHECK-NEXT: v8.w = vasr(v8.w,r6) ; CHECK-NEXT: v11.w = vsub(v11.w,v3.w) ; CHECK-NEXT: } @@ -1592,27 +1592,27 @@ define void @f32u8_0(ptr %a0, ptr %a1) #0 { ; CHECK-NEXT: { ; CHECK-NEXT: v6.w = vasl(v5.w,r2) ; CHECK-NEXT: v9.w = vsub(v14.w,v9.w) -; CHECK-NEXT: v8.w = vmin(v8.w,v21.w) +; CHECK-NEXT: v8.w = vmin(v8.w,v23.w) ; CHECK-NEXT: } ; CHECK-NEXT: { ; CHECK-NEXT: v7.w = vasl(v2.w,r2) ; CHECK-NEXT: v6 = vor(v6,v3) -; CHECK-NEXT: v9.w = vmin(v9.w,v21.w) +; CHECK-NEXT: v9.w = vmin(v9.w,v23.w) ; CHECK-NEXT: q1 = vcmp.gt(v13.w,v8.w) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: v20.w = vasr(v11.w,r6) +; CHECK-NEXT: v22.w = vasr(v11.w,r6) ; CHECK-NEXT: v7 = vor(v7,v3) ; CHECK-NEXT: q2 = vcmp.gt(v13.w,v9.w) ; CHECK-NEXT: } ; CHECK-NEXT: { ; CHECK-NEXT: v12.w = vasr(v12.w,r6) -; CHECK-NEXT: v5.w = vsub(v14.w,v20.w) +; CHECK-NEXT: v5.w = vsub(v14.w,v22.w) ; CHECK-NEXT: } ; CHECK-NEXT: { ; CHECK-NEXT: v4.w = vasl(v1.w,r2) -; CHECK-NEXT: v22.w = vsub(v14.w,v12.w) -; CHECK-NEXT: v5.w = vmin(v5.w,v21.w) +; CHECK-NEXT: v24.w = vsub(v14.w,v12.w) +; CHECK-NEXT: v5.w = vmin(v5.w,v23.w) ; CHECK-NEXT: } ; CHECK-NEXT: { ; CHECK-NEXT: r2 = ##2147483647 @@ -1620,42 +1620,42 @@ define void @f32u8_0(ptr %a0, ptr %a1) #0 { ; CHECK-NEXT: v4 = vor(v4,v3) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: v23 = vsplat(r2) +; CHECK-NEXT: v25 = vsplat(r2) ; CHECK-NEXT: v6.w = vlsr(v6.w,v8.w) ; CHECK-NEXT: v3 = vor(v10,v3) -; CHECK-NEXT: v10.w = vmin(v22.w,v21.w) +; CHECK-NEXT: v10.w = vmin(v24.w,v23.w) ; CHECK-NEXT: } ; CHECK-NEXT: { ; CHECK-NEXT: v7.w = vlsr(v7.w,v9.w) -; CHECK-NEXT: v25 = vmux(q1,v23,v6) +; CHECK-NEXT: v27 = vmux(q1,v25,v6) ; CHECK-NEXT: q1 = vcmp.gt(v13.w,v5.w) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: v24.w = vlsr(v4.w,v5.w) -; CHECK-NEXT: v26 = vmux(q2,v23,v7) +; CHECK-NEXT: v26.w = vlsr(v4.w,v5.w) +; CHECK-NEXT: v28 = vmux(q2,v25,v7) ; CHECK-NEXT: q2 = vcmp.gt(v13.w,v10.w) -; CHECK-NEXT: v4 = vmux(q0,v13,v25) +; CHECK-NEXT: v4 = vmux(q0,v13,v27) ; CHECK-NEXT: } ; CHECK-NEXT: { ; CHECK-NEXT: v3.w = vlsr(v3.w,v10.w) -; CHECK-NEXT: v27 = vmux(q3,v13,v26) -; CHECK-NEXT: v2 = vmux(q1,v23,v24) +; CHECK-NEXT: v29 = vmux(q3,v13,v28) +; CHECK-NEXT: v2 = vmux(q1,v25,v26) ; CHECK-NEXT: q1 = vcmp.gt(v13.w,v1.w) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: v28 = vmux(q2,v23,v3) ; CHECK-NEXT: q3 = vcmp.gt(v13.w,v0.w) -; CHECK-NEXT: v29 = vmux(q1,v13,v2) +; CHECK-NEXT: v1 = vmux(q2,v25,v3) +; CHECK-NEXT: v0 = vmux(q1,v13,v2) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: v30.uh = vpack(v27.w,v4.w):sat -; CHECK-NEXT: v1 = vmux(q3,v13,v28) +; CHECK-NEXT: v30.uh = vpack(v29.w,v4.w):sat +; CHECK-NEXT: v1 = vmux(q3,v13,v1) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: v31.uh = vpack(v1.w,v29.w):sat +; CHECK-NEXT: v31.uh = vpack(v1.w,v0.w):sat ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: v0.uh = vpack(v1.w,v29.w):sat +; CHECK-NEXT: v0.uh = vpack(v1.w,v0.w):sat ; CHECK-NEXT: } ; CHECK-NEXT: { ; CHECK-NEXT: v1.ub = vpack(v31.h,v30.h):sat diff --git a/llvm/test/CodeGen/Hexagon/autohvx/int-to-fp.ll b/llvm/test/CodeGen/Hexagon/autohvx/int-to-fp.ll index 81f8c02ea4af0..369e9a958b44d 100644 --- a/llvm/test/CodeGen/Hexagon/autohvx/int-to-fp.ll +++ b/llvm/test/CodeGen/Hexagon/autohvx/int-to-fp.ll @@ -1065,7 +1065,7 @@ define void @s32f16_0(ptr %a0, ptr %a1) #0 { ; CHECK-NEXT: v6.w = vadd(v6.w,v4.w) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: v24 = vsplat(r4) +; CHECK-NEXT: v27 = vsplat(r4) ; CHECK-NEXT: r5 = ##-2147483648 ; CHECK-NEXT: v5.w = vadd(v5.w,v4.w) ; CHECK-NEXT: } @@ -1076,7 +1076,7 @@ define void @s32f16_0(ptr %a0, ptr %a1) #0 { ; CHECK-NEXT: } ; CHECK-NEXT: { ; CHECK-NEXT: v2.w = vasl(v2.w,v5.w) -; CHECK-NEXT: v23 = vmux(q0,v13,v7) +; CHECK-NEXT: v26 = vmux(q0,v13,v7) ; CHECK-NEXT: v10.w = vadd(v3.w,v8.w) ; CHECK-NEXT: v11 = vand(v3,v9) ; CHECK-NEXT: } @@ -1087,72 +1087,74 @@ define void @s32f16_0(ptr %a0, ptr %a1) #0 { ; CHECK-NEXT: q2 = vcmp.gt(v3.uw,v10.uw) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: v18.uw = vlsr(v10.uw,r3) +; CHECK-NEXT: v12.uw = vlsr(v3.uw,r3) ; CHECK-NEXT: q3 = vcmp.eq(v9.w,v7.w) -; CHECK-NEXT: v19 = vmux(q1,v7,v4) +; CHECK-NEXT: v22 = vmux(q1,v7,v4) ; CHECK-NEXT: q1 = vcmp.gt(v2.uw,v8.uw) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: v8.uw = vlsr(v8.uw,r3) -; CHECK-NEXT: v9.w = vadd(v18.w,v19.w) -; CHECK-NEXT: v21 = vmux(q3,v7,v4) -; CHECK-NEXT: v20 = vmux(q2,v4,v7) +; CHECK-NEXT: v3.uw = vlsr(v10.uw,r3) +; CHECK-NEXT: v24 = vmux(q3,v7,v4) +; CHECK-NEXT: v23 = vmux(q2,v4,v7) +; CHECK-NEXT: v4 = vmux(q1,v4,v7) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: v12.uw = vlsr(v3.uw,r3) -; CHECK-NEXT: v4 = vmux(q1,v4,v7) -; CHECK-NEXT: v22.w = vadd(v8.w,v21.w) -; CHECK-NEXT: v6.w = vsub(v20.w,v6.w) +; CHECK-NEXT: v8.uw = vlsr(v8.uw,r3) +; CHECK-NEXT: v9.w = vadd(v3.w,v22.w) +; CHECK-NEXT: v6.w = vsub(v23.w,v6.w) +; CHECK-NEXT: v4.w = vsub(v4.w,v5.w) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: r3 = #23 ; CHECK-NEXT: v2.uw = vlsr(v2.uw,r3) -; CHECK-NEXT: v4.w = vsub(v4.w,v5.w) -; CHECK-NEXT: q3 = vcmp.eq(v12.w,v18.w) +; CHECK-NEXT: v25.w = vadd(v8.w,v24.w) +; CHECK-NEXT: q3 = vcmp.eq(v12.w,v3.w) +; CHECK-NEXT: v6.w = vadd(v6.w,v27.w) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: v3.uw = vlsr(v18.uw,r2) -; CHECK-NEXT: v4.w = vadd(v4.w,v24.w) +; CHECK-NEXT: r3 = #23 +; CHECK-NEXT: v3.uw = vlsr(v3.uw,r2) ; CHECK-NEXT: q2 = vcmp.eq(v2.w,v8.w) -; CHECK-NEXT: v6.w = vadd(v6.w,v24.w) +; CHECK-NEXT: v4.w = vadd(v4.w,v27.w) ; CHECK-NEXT: } ; CHECK-NEXT: { ; CHECK-NEXT: v9.uw = vlsr(v9.uw,r2) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: v25.uw = vlsr(v22.uw,r2) +; CHECK-NEXT: v28.uw = vlsr(v25.uw,r2) ; CHECK-NEXT: v3 = vmux(q3,v9,v3) ; CHECK-NEXT: q3 = vcmp.gt(v7.w,v0.w) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: v26.uw = vlsr(v8.uw,r2) -; CHECK-NEXT: v28 = vmux(q3,v13,v7) +; CHECK-NEXT: v2.uw = vlsr(v8.uw,r2) +; CHECK-NEXT: v30 = vmux(q3,v13,v7) +; CHECK-NEXT: v3 = vor(v26,v3) ; CHECK-NEXT: q3 = vcmp.eq(v0.w,v7.w) -; CHECK-NEXT: v3 = vor(v23,v3) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: v30.w = vasl(v4.w,r3) -; CHECK-NEXT: v2 = vmux(q2,v25,v26) +; CHECK-NEXT: v29.w = vasl(v6.w,r3) +; CHECK-NEXT: v2 = vmux(q2,v28,v2) ; CHECK-NEXT: q2 = vcmp.eq(v1.w,v7.w) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: v27.w = vasl(v6.w,r3) -; CHECK-NEXT: v29 = vor(v28,v2) -; CHECK-NEXT: v31 = vmux(q2,v7,v31) +; CHECK-NEXT: v2.w = vasl(v4.w,r3) +; CHECK-NEXT: v31 = vor(v30,v2) +; CHECK-NEXT: v3 = vor(v3,v29) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: v1 = vor(v29,v30) -; CHECK-NEXT: v3 = vor(v3,v27) +; CHECK-NEXT: v1 = vor(v31,v2) +; CHECK-NEXT: v3 = vmux(q2,v7,v3) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: v30.qf32 = vadd(v31.sf,v7.sf) ; CHECK-NEXT: v0 = vmux(q3,v7,v1) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: v31.qf32 = vadd(v0.sf,v7.sf) +; CHECK-NEXT: v2.qf32 = vadd(v3.sf,v7.sf) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: v0.hf = v31:30.qf32 +; CHECK-NEXT: v3.qf32 = vadd(v0.sf,v7.sf) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v0.hf = v3:2.qf32 ; CHECK-NEXT: } ; CHECK-NEXT: { ; CHECK-NEXT: v0.h = vdeal(v0.h) @@ -1192,17 +1194,14 @@ define void @s32f16_1(ptr %a0, ptr %a1) #0 { ; CHECK-NEXT: v4.w = vadd(v4.w,v2.w) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: v26 = vsplat(r5) -; CHECK-NEXT: v27 = vsplat(r4) +; CHECK-NEXT: v28 = vsplat(r5) +; CHECK-NEXT: v29 = vsplat(r4) ; CHECK-NEXT: q3 = vcmp.gt(v3.w,v0.w) ; CHECK-NEXT: } ; CHECK-NEXT: { ; CHECK-NEXT: r3 = #23 -; CHECK-NEXT: v31.qf32 = vadd(v3.sf,v3.sf) -; CHECK-NEXT: v29 = vmux(q3,v27,v3) -; CHECK-NEXT: } -; CHECK-NEXT: { ; CHECK-NEXT: v1.w = vasl(v1.w,v4.w) +; CHECK-NEXT: v31 = vmux(q3,v29,v3) ; CHECK-NEXT: } ; CHECK-NEXT: { ; CHECK-NEXT: v5.w = vadd(v1.w,v5.w) @@ -1215,42 +1214,43 @@ define void @s32f16_1(ptr %a0, ptr %a1) #0 { ; CHECK-NEXT: } ; CHECK-NEXT: { ; CHECK-NEXT: r2 = #64 -; CHECK-NEXT: v24.uw = vlsr(v5.uw,r2) -; CHECK-NEXT: v25 = vmux(q0,v3,v2) +; CHECK-NEXT: v1.uw = vlsr(v5.uw,r2) +; CHECK-NEXT: v27 = vmux(q0,v3,v2) ; CHECK-NEXT: v2 = vmux(q1,v2,v3) ; CHECK-NEXT: } ; CHECK-NEXT: { ; CHECK-NEXT: q3 = vsetq(r2) -; CHECK-NEXT: v5.w = vadd(v24.w,v25.w) +; CHECK-NEXT: v5.w = vadd(v1.w,v27.w) ; CHECK-NEXT: v2.w = vsub(v2.w,v4.w) -; CHECK-NEXT: q2 = vcmp.eq(v7.w,v24.w) +; CHECK-NEXT: q2 = vcmp.eq(v7.w,v1.w) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: v1.uw = vlsr(v24.uw,r6) -; CHECK-NEXT: v2.w = vadd(v2.w,v26.w) +; CHECK-NEXT: v1.uw = vlsr(v1.uw,r6) +; CHECK-NEXT: v2.w = vadd(v2.w,v28.w) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: v28.uw = vlsr(v5.uw,r6) +; CHECK-NEXT: v30.uw = vlsr(v5.uw,r6) ; CHECK-NEXT: } ; CHECK-NEXT: { ; CHECK-NEXT: v2.w = vasl(v2.w,r3) -; CHECK-NEXT: v1 = vmux(q2,v28,v1) +; CHECK-NEXT: v1 = vmux(q2,v30,v1) ; CHECK-NEXT: q2 = vcmp.eq(v0.w,v3.w) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: v1 = vor(v29,v1) +; CHECK-NEXT: v1 = vor(v31,v1) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: v30 = vor(v1,v2) +; CHECK-NEXT: v1.qf32 = vadd(v3.sf,v3.sf) +; CHECK-NEXT: v0 = vor(v1,v2) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: v30 = vmux(q2,v3,v30) +; CHECK-NEXT: v0 = vmux(q2,v3,v0) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: v30.qf32 = vadd(v30.sf,v3.sf) +; CHECK-NEXT: v0.qf32 = vadd(v0.sf,v3.sf) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: v0.hf = v31:30.qf32 +; CHECK-NEXT: v0.hf = v1:0.qf32 ; CHECK-NEXT: } ; CHECK-NEXT: { ; CHECK-NEXT: v0.h = vdeal(v0.h) @@ -2410,70 +2410,72 @@ define void @u32f16_0(ptr %a0, ptr %a1) #0 { ; CHECK-NEXT: q1 = vcmp.eq(v13.w,v9.w) ; CHECK-NEXT: } ; CHECK-NEXT: { +; CHECK-NEXT: v28.uw = vlsr(v11.uw,r3) ; CHECK-NEXT: q3 = vcmp.gt(v8.uw,v6.uw) ; CHECK-NEXT: q2 = vcmp.eq(v7.w,v9.w) -; CHECK-NEXT: v28 = vmux(q0,v4,v9) -; CHECK-NEXT: v27 = vmux(q1,v9,v4) +; CHECK-NEXT: v30 = vmux(q0,v4,v9) ; CHECK-NEXT: } ; CHECK-NEXT: { ; CHECK-NEXT: v6.uw = vlsr(v6.uw,r3) -; CHECK-NEXT: v29 = vmux(q3,v4,v9) +; CHECK-NEXT: v29 = vmux(q1,v9,v4) +; CHECK-NEXT: v31 = vmux(q3,v4,v9) ; CHECK-NEXT: v4 = vmux(q2,v9,v4) -; CHECK-NEXT: q2 = vcmp.eq(v2.w,v9.w) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: v26.uw = vlsr(v11.uw,r3) -; CHECK-NEXT: v1.w = vsub(v29.w,v1.w) +; CHECK-NEXT: v3.w = vsub(v30.w,v3.w) +; CHECK-NEXT: v7.w = vadd(v28.w,v29.w) +; CHECK-NEXT: v1.w = vsub(v31.w,v1.w) ; CHECK-NEXT: v4.w = vadd(v6.w,v4.w) -; CHECK-NEXT: v31 = vmux(q2,v9,v31) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: v14.uw = vlsr(v8.uw,r3) -; CHECK-NEXT: v7.w = vadd(v26.w,v27.w) +; CHECK-NEXT: v12.uw = vlsr(v5.uw,r3) +; CHECK-NEXT: v3.w = vadd(v3.w,v10.w) ; CHECK-NEXT: v1.w = vadd(v1.w,v10.w) -; CHECK-NEXT: v3.w = vsub(v28.w,v3.w) +; CHECK-NEXT: q2 = vcmp.eq(v2.w,v9.w) ; CHECK-NEXT: } ; CHECK-NEXT: { ; CHECK-NEXT: r3 = #23 -; CHECK-NEXT: v12.uw = vlsr(v5.uw,r3) +; CHECK-NEXT: v14.uw = vlsr(v8.uw,r3) +; CHECK-NEXT: q3 = vcmp.eq(v12.w,v28.w) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v5.uw = vlsr(v28.uw,r2) ; CHECK-NEXT: q1 = vcmp.eq(v14.w,v6.w) -; CHECK-NEXT: v3.w = vadd(v3.w,v10.w) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v7.uw = vlsr(v7.uw,r2) ; CHECK-NEXT: } ; CHECK-NEXT: { ; CHECK-NEXT: v4.uw = vlsr(v4.uw,r2) -; CHECK-NEXT: q3 = vcmp.eq(v12.w,v26.w) +; CHECK-NEXT: v5 = vmux(q3,v7,v5) +; CHECK-NEXT: q3 = vcmp.eq(v0.w,v9.w) ; CHECK-NEXT: } ; CHECK-NEXT: { ; CHECK-NEXT: v6.uw = vlsr(v6.uw,r2) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: v5.uw = vlsr(v26.uw,r2) -; CHECK-NEXT: v30 = vmux(q1,v4,v6) +; CHECK-NEXT: v3.w = vasl(v3.w,r3) +; CHECK-NEXT: v2 = vmux(q1,v4,v6) ; CHECK-NEXT: } ; CHECK-NEXT: { ; CHECK-NEXT: v1.w = vasl(v1.w,r3) +; CHECK-NEXT: v3 = vor(v5,v3) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: v7.uw = vlsr(v7.uw,r2) -; CHECK-NEXT: v1 = vor(v30,v1) -; CHECK-NEXT: } -; CHECK-NEXT: { -; CHECK-NEXT: v30.qf32 = vadd(v31.sf,v9.sf) -; CHECK-NEXT: v5 = vmux(q3,v7,v5) -; CHECK-NEXT: q3 = vcmp.eq(v0.w,v9.w) +; CHECK-NEXT: v1 = vor(v2,v1) +; CHECK-NEXT: v3 = vmux(q2,v9,v3) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: v3.w = vasl(v3.w,r3) ; CHECK-NEXT: v0 = vmux(q3,v9,v1) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: v3 = vor(v5,v3) +; CHECK-NEXT: v2.qf32 = vadd(v3.sf,v9.sf) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: v31.qf32 = vadd(v0.sf,v9.sf) +; CHECK-NEXT: v3.qf32 = vadd(v0.sf,v9.sf) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: v0.hf = v31:30.qf32 +; CHECK-NEXT: v0.hf = v3:2.qf32 ; CHECK-NEXT: } ; CHECK-NEXT: { ; CHECK-NEXT: v0.h = vdeal(v0.h) @@ -2511,7 +2513,6 @@ define void @u32f16_1(ptr %a0, ptr %a1) #0 { ; CHECK-NEXT: { ; CHECK-NEXT: v7 = vsplat(r4) ; CHECK-NEXT: r3 = #23 -; CHECK-NEXT: v31.qf32 = vadd(v2.sf,v2.sf) ; CHECK-NEXT: q2 = vcmp.eq(v0.w,v2.w) ; CHECK-NEXT: } ; CHECK-NEXT: { @@ -2533,33 +2534,34 @@ define void @u32f16_1(ptr %a0, ptr %a1) #0 { ; CHECK-NEXT: } ; CHECK-NEXT: { ; CHECK-NEXT: v1.w = vsub(v3.w,v1.w) -; CHECK-NEXT: v28.w = vadd(v4.w,v5.w) +; CHECK-NEXT: v30.w = vadd(v4.w,v5.w) ; CHECK-NEXT: q1 = vcmp.eq(v6.w,v4.w) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: v29.uw = vlsr(v4.uw,r2) +; CHECK-NEXT: v31.uw = vlsr(v4.uw,r2) ; CHECK-NEXT: v1.w = vadd(v1.w,v7.w) ; CHECK-NEXT: } ; CHECK-NEXT: { ; CHECK-NEXT: r2 = #64 -; CHECK-NEXT: v3.uw = vlsr(v28.uw,r2) +; CHECK-NEXT: v3.uw = vlsr(v30.uw,r2) ; CHECK-NEXT: } ; CHECK-NEXT: { ; CHECK-NEXT: v1.w = vasl(v1.w,r3) ; CHECK-NEXT: q3 = vsetq(r2) -; CHECK-NEXT: v3 = vmux(q1,v3,v29) +; CHECK-NEXT: v3 = vmux(q1,v3,v31) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: v30 = vor(v3,v1) +; CHECK-NEXT: v1.qf32 = vadd(v2.sf,v2.sf) +; CHECK-NEXT: v0 = vor(v3,v1) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: v30 = vmux(q2,v2,v30) +; CHECK-NEXT: v0 = vmux(q2,v2,v0) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: v30.qf32 = vadd(v30.sf,v2.sf) +; CHECK-NEXT: v0.qf32 = vadd(v0.sf,v2.sf) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: v0.hf = v31:30.qf32 +; CHECK-NEXT: v0.hf = v1:0.qf32 ; CHECK-NEXT: } ; CHECK-NEXT: { ; CHECK-NEXT: v0.h = vdeal(v0.h) diff --git a/llvm/test/CodeGen/Hexagon/autohvx/vmpy-parts.ll b/llvm/test/CodeGen/Hexagon/autohvx/vmpy-parts.ll index 1d51210519e57..e03bf942e44b3 100644 --- a/llvm/test/CodeGen/Hexagon/autohvx/vmpy-parts.ll +++ b/llvm/test/CodeGen/Hexagon/autohvx/vmpy-parts.ll @@ -342,13 +342,13 @@ define <64 x i32> @f10(<32 x i32> %a0, <32 x i32> %a1) #0 { ; V60-NEXT: v6.uw = vlsr(v2.uw,r2) ; V60-NEXT: } ; V60-NEXT: { -; V60-NEXT: v31 = vmux(q1,v1,v4) +; V60-NEXT: v5 = vdelta(v1,v5) ; V60-NEXT: } ; V60-NEXT: { -; V60-NEXT: v5 = vdelta(v1,v5) +; V60-NEXT: v1 = vmux(q1,v1,v4) ; V60-NEXT: } ; V60-NEXT: { -; V60-NEXT: if (q0) v31.w += v0.w +; V60-NEXT: if (q0) v1.w += v0.w ; V60-NEXT: } ; V60-NEXT: { ; V60-NEXT: v9:8.uw = vmpy(v0.uh,v5.uh) @@ -357,19 +357,19 @@ define <64 x i32> @f10(<32 x i32> %a0, <32 x i32> %a1) #0 { ; V60-NEXT: v9:8.w = vadd(v9.uh,v8.uh) ; V60-NEXT: } ; V60-NEXT: { -; V60-NEXT: v30.w = vadd(v8.w,v6.w) +; V60-NEXT: v31.w = vadd(v8.w,v6.w) ; V60-NEXT: } ; V60-NEXT: { ; V60-NEXT: v2.w += vasl(v8.w,r2) ; V60-NEXT: } ; V60-NEXT: { -; V60-NEXT: v9.w += vasr(v30.w,r2) +; V60-NEXT: v9.w += vasr(v31.w,r2) ; V60-NEXT: } ; V60-NEXT: { ; V60-NEXT: v0.w = vadd(v3.w,v9.w) ; V60-NEXT: } ; V60-NEXT: { -; V60-NEXT: v3.w = vsub(v0.w,v31.w) +; V60-NEXT: v3.w = vsub(v0.w,v1.w) ; V60-NEXT: } ; V60-NEXT: { ; V60-NEXT: v1:0 = vcombine(v3,v2) diff --git a/llvm/test/CodeGen/SystemZ/inline-asm-fp-int-casting-explicit-regs.ll b/llvm/test/CodeGen/SystemZ/inline-asm-fp-int-casting-explicit-regs.ll index 3cbf3d21dec5a..f927bdd5390f6 100644 --- a/llvm/test/CodeGen/SystemZ/inline-asm-fp-int-casting-explicit-regs.ll +++ b/llvm/test/CodeGen/SystemZ/inline-asm-fp-int-casting-explicit-regs.ll @@ -35,8 +35,8 @@ define void @__int128_and_f(ptr noalias nocapture writeonly sret(i128) align 8 % ; Z15-LABEL: __int128_and_f: ; Z15: # %bb.0: # %entry ; Z15-NEXT: vl %v0, 0(%r3), 3 -; Z15-NEXT: vlr %v4, %v0 ; Z15-NEXT: vrepg %v6, %v0, 1 +; Z15-NEXT: vlr %v4, %v0 ; Z15-NEXT: #APP ; Z15-NEXT: #NO_APP ; Z15-NEXT: vmrhg %v0, %v4, %v6 @@ -260,8 +260,8 @@ entry: define <4 x i32> @vec128_and_f(<4 x i32> %cc_dep1) { ; CHECK-LABEL: vec128_and_f: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vlr %v1, %v24 ; CHECK-NEXT: vrepg %v3, %v24, 1 +; CHECK-NEXT: vlr %v1, %v24 ; CHECK-NEXT: #APP ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: vmrhg %v24, %v1, %v3 diff --git a/llvm/test/CodeGen/SystemZ/inline-asm-fp-int-casting.ll b/llvm/test/CodeGen/SystemZ/inline-asm-fp-int-casting.ll index 23d78a9315b40..b953fc31f3fce 100644 --- a/llvm/test/CodeGen/SystemZ/inline-asm-fp-int-casting.ll +++ b/llvm/test/CodeGen/SystemZ/inline-asm-fp-int-casting.ll @@ -249,8 +249,8 @@ entry: define <4 x i32> @vec128_and_f(<4 x i32> %cc_dep1) { ; CHECK-LABEL: vec128_and_f: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vlr %v0, %v24 ; CHECK-NEXT: vrepg %v2, %v24, 1 +; CHECK-NEXT: vlr %v0, %v24 ; CHECK-NEXT: #APP ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: vmrhg %v24, %v0, %v2 diff --git a/llvm/test/CodeGen/Thumb2/mve-vldst4.ll b/llvm/test/CodeGen/Thumb2/mve-vldst4.ll index 219541cffb940..2e75ce90eb48c 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vldst4.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vldst4.ll @@ -47,8 +47,8 @@ define void @vldst4(ptr nocapture readonly %pIn, ptr nocapture %pOut, i32 %numRo ; CHECK-NEXT: vmovx.f16 s8, s13 ; CHECK-NEXT: vins.f16 s20, s22 ; CHECK-NEXT: vins.f16 s16, s18 -; CHECK-NEXT: vmov.f32 s2, s5 ; CHECK-NEXT: vins.f16 s25, s6 +; CHECK-NEXT: vmov.f32 s2, s5 ; CHECK-NEXT: vmov.f32 s3, s17 ; CHECK-NEXT: vins.f16 s0, s15 ; CHECK-NEXT: vmovx.f16 s9, s21 diff --git a/llvm/test/CodeGen/Thumb2/mve-vst3.ll b/llvm/test/CodeGen/Thumb2/mve-vst3.ll index 7d662d1f1a990..3f92152e18955 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vst3.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vst3.ll @@ -1358,8 +1358,8 @@ define void @vst3_v16f16(ptr %src, ptr %dst) { ; CHECK-NEXT: vldrw.u32 q3, [r0] ; CHECK-NEXT: vmov r2, s2 ; CHECK-NEXT: vmov.f32 s16, s12 -; CHECK-NEXT: vmov.f32 s0, s13 ; CHECK-NEXT: vins.f16 s16, s24 +; CHECK-NEXT: vmov.f32 s0, s13 ; CHECK-NEXT: vmov.16 q4[4], r2 ; CHECK-NEXT: vins.f16 s0, s25 ; CHECK-NEXT: vmov.f32 s19, s0