Skip to content

Commit

Permalink
[ARM][NEON] Add constraint to vld2 Odd/Even Pseudo instructions. (#79287
Browse files Browse the repository at this point in the history
)

This ensures the odd/even pseudo instructions are allocated to the same
register range.

This fixes #71763
  • Loading branch information
AlfieRichardsArm committed Jan 31, 2024
1 parent c2675ba commit de75e50
Show file tree
Hide file tree
Showing 6 changed files with 321 additions and 83 deletions.
9 changes: 3 additions & 6 deletions llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -640,12 +640,9 @@ void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI) {
// has an extra operand that is a use of the super-register. Record the
// operand index and skip over it.
unsigned SrcOpIdx = 0;
if (!IsVLD2DUP) {
if (RegSpc == EvenDblSpc || RegSpc == OddDblSpc ||
RegSpc == SingleLowSpc || RegSpc == SingleHighQSpc ||
RegSpc == SingleHighTSpc)
SrcOpIdx = OpIdx++;
}
if (RegSpc == EvenDblSpc || RegSpc == OddDblSpc || RegSpc == SingleLowSpc ||
RegSpc == SingleHighQSpc || RegSpc == SingleHighTSpc)
SrcOpIdx = OpIdx++;

// Copy the predicate operands.
MIB.add(MI.getOperand(OpIdx++));
Expand Down
5 changes: 0 additions & 5 deletions llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3032,11 +3032,6 @@ void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool IsIntrinsic,
}
if (is64BitVector || NumVecs == 1) {
// Double registers and VLD1 quad registers are directly supported.
} else if (NumVecs == 2) {
const SDValue OpsA[] = {MemAddr, Align, Pred, Reg0, Chain};
SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, ResTy,
MVT::Other, OpsA);
Chain = SDValue(VLdA, 1);
} else {
SDValue ImplDef = SDValue(
CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
Expand Down
38 changes: 26 additions & 12 deletions llvm/lib/Target/ARM/ARMInstrNEON.td
Original file line number Diff line number Diff line change
Expand Up @@ -1491,12 +1491,26 @@ def VLD2DUPd16x2 : VLD2DUP<{0,1,1,?}, "16", VecListDPairSpacedAllLanes,
def VLD2DUPd32x2 : VLD2DUP<{1,0,1,?}, "32", VecListDPairSpacedAllLanes,
addrmode6dupalign64>;

def VLD2DUPq8EvenPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
def VLD2DUPq8OddPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
def VLD2DUPq16EvenPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
def VLD2DUPq16OddPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
def VLD2DUPq32EvenPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
def VLD2DUPq32OddPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
// Duplicate of VLDQQPseudo but with a constraint variable
// to ensure the odd and even lanes use the same register range
class VLDQQPseudoInputDST<InstrItinClass itin>
: PseudoNLdSt<(outs QQPR:$dst), (ins addrmode6:$addr, QQPR: $src), itin,
"$src = $dst">;
class VLDQQWBPseudoInputDST<InstrItinClass itin>
: PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
(ins addrmode6:$addr, am6offset:$offset, QQPR: $src), itin,
"$addr.addr = $wb, $src = $dst">;
class VLDQQWBfixedPseudoInputDST<InstrItinClass itin>
: PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
(ins addrmode6:$addr, QQPR: $src), itin,
"$addr.addr = $wb, $src = $dst">;

def VLD2DUPq8EvenPseudo : VLDQQPseudoInputDST<IIC_VLD2dup>, Sched<[WriteVLD2]>;
def VLD2DUPq8OddPseudo : VLDQQPseudoInputDST<IIC_VLD2dup>, Sched<[WriteVLD2]>;
def VLD2DUPq16EvenPseudo : VLDQQPseudoInputDST<IIC_VLD2dup>, Sched<[WriteVLD2]>;
def VLD2DUPq16OddPseudo : VLDQQPseudoInputDST<IIC_VLD2dup>, Sched<[WriteVLD2]>;
def VLD2DUPq32EvenPseudo : VLDQQPseudoInputDST<IIC_VLD2dup>, Sched<[WriteVLD2]>;
def VLD2DUPq32OddPseudo : VLDQQPseudoInputDST<IIC_VLD2dup>, Sched<[WriteVLD2]>;

// ...with address register writeback:
multiclass VLD2DUPWB<bits<4> op7_4, string Dt, RegisterOperand VdTy,
Expand Down Expand Up @@ -1534,12 +1548,12 @@ defm VLD2DUPd16x2wb : VLD2DUPWB<{0,1,1,?}, "16", VecListDPairSpacedAllLanes,
defm VLD2DUPd32x2wb : VLD2DUPWB<{1,0,1,?}, "32", VecListDPairSpacedAllLanes,
addrmode6dupalign64>;

def VLD2DUPq8OddPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
def VLD2DUPq16OddPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
def VLD2DUPq32OddPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
def VLD2DUPq8OddPseudoWB_register : VLDQQWBPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
def VLD2DUPq16OddPseudoWB_register : VLDQQWBPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
def VLD2DUPq32OddPseudoWB_register : VLDQQWBPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
def VLD2DUPq8OddPseudoWB_fixed : VLDQQWBfixedPseudoInputDST<IIC_VLD2dup>, Sched<[WriteVLD2]>;
def VLD2DUPq16OddPseudoWB_fixed : VLDQQWBfixedPseudoInputDST<IIC_VLD2dup>, Sched<[WriteVLD2]>;
def VLD2DUPq32OddPseudoWB_fixed : VLDQQWBfixedPseudoInputDST<IIC_VLD2dup>, Sched<[WriteVLD2]>;
def VLD2DUPq8OddPseudoWB_register : VLDQQWBPseudoInputDST<IIC_VLD2dup>, Sched<[WriteVLD2]>;
def VLD2DUPq16OddPseudoWB_register : VLDQQWBPseudoInputDST<IIC_VLD2dup>, Sched<[WriteVLD2]>;
def VLD2DUPq32OddPseudoWB_register : VLDQQWBPseudoInputDST<IIC_VLD2dup>, Sched<[WriteVLD2]>;

// VLD3DUP : Vector Load (single 3-element structure to all lanes)
class VLD3DUP<bits<4> op7_4, string Dt>
Expand Down

0 comments on commit de75e50

Please sign in to comment.