diff --git a/llvm/lib/CodeGen/PeepholeOptimizer.cpp b/llvm/lib/CodeGen/PeepholeOptimizer.cpp index fb3e6482bb096..729a57ef23b1e 100644 --- a/llvm/lib/CodeGen/PeepholeOptimizer.cpp +++ b/llvm/lib/CodeGen/PeepholeOptimizer.cpp @@ -1203,6 +1203,18 @@ bool PeepholeOptimizer::optimizeCoalescableCopyImpl(Rewriter &&CpyRewriter) { if (!NewSrc.Reg) continue; + if (NewSrc.SubReg) { + // Verify the register class supports the subregister index. ARM's + // copy-like queries return register:subreg pairs where the register's + // current class does not directly support the subregister index. + const TargetRegisterClass *RC = MRI->getRegClass(NewSrc.Reg); + const TargetRegisterClass *WithSubRC = + TRI->getSubClassWithSubReg(RC, NewSrc.SubReg); + if (!MRI->constrainRegClass(NewSrc.Reg, WithSubRC)) + continue; + Changed = true; + } + // Rewrite source. if (CpyRewriter.RewriteCurrentSource(NewSrc.Reg, NewSrc.SubReg)) { // We may have extended the live-range of NewSrc, account for that. @@ -1275,6 +1287,18 @@ MachineInstr &PeepholeOptimizer::rewriteSource(MachineInstr &CopyLike, const TargetRegisterClass *DefRC = MRI->getRegClass(Def.Reg); Register NewVReg = MRI->createVirtualRegister(DefRC); + if (NewSrc.SubReg) { + const TargetRegisterClass *NewSrcRC = MRI->getRegClass(NewSrc.Reg); + const TargetRegisterClass *WithSubRC = + TRI->getSubClassWithSubReg(NewSrcRC, NewSrc.SubReg); + + // The new source may not directly support the subregister, but we should be + // able to assume it is constrainable to support the subregister (otherwise + // ValueTracker was lying and reported a useless value). + if (!MRI->constrainRegClass(NewSrc.Reg, WithSubRC)) + llvm_unreachable("replacement register cannot support subregister"); + } + MachineInstr *NewCopy = BuildMI(*CopyLike.getParent(), &CopyLike, CopyLike.getDebugLoc(), TII->get(TargetOpcode::COPY), NewVReg) diff --git a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp index e94220af05a0d..2e8a676269a74 100644 --- a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -960,17 +960,3 @@ bool ARMBaseRegisterInfo::shouldCoalesce(MachineInstr *MI, } return false; } - -bool ARMBaseRegisterInfo::shouldRewriteCopySrc(const TargetRegisterClass *DefRC, - unsigned DefSubReg, - const TargetRegisterClass *SrcRC, - unsigned SrcSubReg) const { - // We can't extract an SPR from an arbitary DPR (as opposed to a DPR_VFP2). - if (DefRC == &ARM::SPRRegClass && DefSubReg == 0 && - SrcRC == &ARM::DPRRegClass && - (SrcSubReg == ARM::ssub_0 || SrcSubReg == ARM::ssub_1)) - return false; - - return TargetRegisterInfo::shouldRewriteCopySrc(DefRC, DefSubReg, - SrcRC, SrcSubReg); -} diff --git a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h index 5b67b34089d7e..03b0fa0d1ee08 100644 --- a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h +++ b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h @@ -158,11 +158,6 @@ class ARMBaseRegisterInfo : public ARMGenRegisterInfo { const TargetRegisterClass *NewRC, LiveIntervals &LIS) const override; - bool shouldRewriteCopySrc(const TargetRegisterClass *DefRC, - unsigned DefSubReg, - const TargetRegisterClass *SrcRC, - unsigned SrcSubReg) const override; - int getSEHRegNum(unsigned i) const { return getEncodingValue(i); } }; diff --git a/llvm/test/CodeGen/ARM/issue159343.ll b/llvm/test/CodeGen/ARM/issue159343.ll new file mode 100644 index 0000000000000..03292582918a9 --- /dev/null +++ b/llvm/test/CodeGen/ARM/issue159343.ll @@ -0,0 +1,55 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc < %s | FileCheck %s + +; Make sure there's no assertion from peephole-opt introducing illegal +; subregister index uses. + +target triple = "thumbv7-unknown-linux-android29" + +define void @_ZN11VersionEdit10DecodeFromEv(i1 %call4, ptr %__profc__ZN11VersionEdit10DecodeFromEv) nounwind { +; CHECK-LABEL: _ZN11VersionEdit10DecodeFromEv: +; CHECK: @ %bb.0: @ %land.rhs.lr.ph +; CHECK-NEXT: lsls r0, r0, #31 +; CHECK-NEXT: beq .LBB0_2 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: adr r0, .LCPI0_0 +; CHECK-NEXT: vld1.64 {d0, d1}, [r0:128] +; CHECK-NEXT: b .LBB0_3 +; CHECK-NEXT: .LBB0_2: @ %select.false +; CHECK-NEXT: vmov.i32 q0, #0x0 +; CHECK-NEXT: .LBB0_3: @ %select.end +; CHECK-NEXT: vldr s5, .LCPI0_1 +; CHECK-NEXT: vldr s4, .LCPI0_2 +; CHECK-NEXT: vmov.f32 s6, s0 +; CHECK-NEXT: vmov.f32 s7, s1 +; CHECK-NEXT: vst1.64 {d2, d3}, [r1] +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: @ %bb.4: +; CHECK-NEXT: .LCPI0_0: +; CHECK-NEXT: .long 1 @ 0x1 +; CHECK-NEXT: .long 0 @ 0x0 +; CHECK-NEXT: .long 1 @ 0x1 +; CHECK-NEXT: .long 0 @ 0x0 +; CHECK-NEXT: .LCPI0_1: +; CHECK-NEXT: .long 0x00000000 @ float 0 +; CHECK-NEXT: .LCPI0_2: +; CHECK-NEXT: .long 0x00000001 @ float 1.40129846E-45 +land.rhs.lr.ph: + br i1 %call4, label %sw.bb, label %while.cond.while.end_crit_edge.split.loop.exit43 + +while.cond.while.end_crit_edge.split.loop.exit43: ; preds = %land.rhs.lr.ph + %ext0 = extractelement <4 x i64> zeroinitializer, i64 0 + br label %while.cond.while.end_crit_edge + +while.cond.while.end_crit_edge: ; preds = %sw.bb, %while.cond.while.end_crit_edge.split.loop.exit43 + %pgocount5374.ph = phi i64 [ %ext1, %sw.bb ], [ %ext0, %while.cond.while.end_crit_edge.split.loop.exit43 ] + %ins = insertelement <2 x i64> splat (i64 1), i64 %pgocount5374.ph, i64 1 + store <2 x i64> %ins, ptr %__profc__ZN11VersionEdit10DecodeFromEv, align 8 + ret void + +sw.bb: ; preds = %land.rhs.lr.ph + %ext1 = extractelement <4 x i64> splat (i64 1), i64 0 + br label %while.cond.while.end_crit_edge +} + diff --git a/llvm/test/CodeGen/ARM/pr159343.mir b/llvm/test/CodeGen/ARM/pr159343.mir new file mode 100644 index 0000000000000..9b71b1ad94b2f --- /dev/null +++ b/llvm/test/CodeGen/ARM/pr159343.mir @@ -0,0 +1,31 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6 +# RUN: llc -run-pass=peephole-opt -verify-machineinstrs -mtriple=thumbv7-unknown-linux-android29 %s -o - | FileCheck %s +--- +name: Test_shouldRewriteCopySrc_Invalid_SubReg +tracksRegLiveness: true +body: | + bb.1: + liveins: $r0, $r1 + + ; CHECK-LABEL: name: Test_shouldRewriteCopySrc_Invalid_SubReg + ; CHECK: liveins: $r0, $r1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:dpair = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY:%[0-9]+]]:dpr_vfp2 = COPY [[DEF]].dsub_0 + ; CHECK-NEXT: [[VMOVRRD:%[0-9]+]]:gpr, [[VMOVRRD1:%[0-9]+]]:gpr = VMOVRRD [[COPY]], 14 /* CC::al */, $noreg + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:spr = COPY [[COPY]].ssub_1 + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:spr = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:spr = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:spr = IMPLICIT_DEF + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:mqpr = REG_SEQUENCE killed [[DEF2]], %subreg.ssub_0, killed [[DEF1]], %subreg.ssub_1, killed [[DEF3]], %subreg.ssub_2, [[COPY]].ssub_1, %subreg.ssub_3 + ; CHECK-NEXT: VST1q64 $r1, 0, killed [[REG_SEQUENCE]], 14 /* CC::al */, $noreg + %0:dpair = IMPLICIT_DEF + %1:dpr = COPY %0.dsub_0 + %2:gpr, %3:gpr = VMOVRRD killed %1, 14 /* CC::al */, $noreg + %4:spr = VMOVSR killed %3, 14 /* CC::al */, $noreg + %5:spr = IMPLICIT_DEF + %6:spr = IMPLICIT_DEF + %7:spr = IMPLICIT_DEF + %8:mqpr = REG_SEQUENCE killed %6, %subreg.ssub_0, killed %5, %subreg.ssub_1, killed %7, %subreg.ssub_2, killed %4, %subreg.ssub_3 + VST1q64 $r1, 0, killed %8, 14 /* CC::al */, $noreg +... diff --git a/llvm/test/CodeGen/ARM/shouldRewriteCopySrc.ll b/llvm/test/CodeGen/ARM/shouldRewriteCopySrc.ll index e653aaa316fed..2bf8f29eccb40 100644 --- a/llvm/test/CodeGen/ARM/shouldRewriteCopySrc.ll +++ b/llvm/test/CodeGen/ARM/shouldRewriteCopySrc.ll @@ -12,8 +12,8 @@ define float @shouldRewriteCopySrc(double %arg) #0 { ; CHECK-NEXT: @APP ; CHECK-NEXT: nop ; CHECK-NEXT: @NO_APP -; CHECK-NEXT: vmov r0, r1, d16 -; CHECK-NEXT: vmov s0, r0 +; CHECK-NEXT: vmov.f64 d0, d16 +; CHECK-NEXT: @ kill: def $s0 killed $s0 killed $d0 ; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} ; CHECK-NEXT: bx lr bb: