Skip to content

Commit

Permalink
[PowerPC] Fix 32bit vector insert instructions for ISA3.1
Browse files Browse the repository at this point in the history
The platform independent ISD::INSERT_VECTOR_ELT take a element index,
but vins* instructions take a byte index. Update 32bit td patterns for
vector insert to handle the element index accordingly.

Since vector insert for non constant index are supported in
ISA3.1, there is no need to use platform specific ISD node,
PPCISD::VECINSERT.  Update td pattern to directly use
ISD::INSERT_VECTOR_ELT instead.

Reviewed By: nemanjai, #powerpc

Differential Revision: https://reviews.llvm.org/D113802
  • Loading branch information
lei137 committed Nov 15, 2021
1 parent 1ca00ec commit f50c6c1
Show file tree
Hide file tree
Showing 4 changed files with 119 additions and 130 deletions.
29 changes: 12 additions & 17 deletions llvm/lib/Target/PowerPC/PPCISelLowering.cpp
Expand Up @@ -1247,9 +1247,16 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
}

if (Subtarget.hasP9Altivec()) {
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);

if (Subtarget.isISA3_1()) {
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Legal);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Legal);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Legal);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Legal);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Legal);
} else {
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
}
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Legal);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Legal);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Legal);
Expand All @@ -1258,9 +1265,6 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Legal);
}

if (Subtarget.isISA3_1())
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Custom);
}

if (Subtarget.pairedVectorMemops()) {
Expand Down Expand Up @@ -10752,7 +10756,6 @@ SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
SDLoc dl(Op);
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
SDValue V3 = Op.getOperand(2);

if (VT == MVT::v2f64 && C)
return Op;
Expand All @@ -10761,18 +10764,10 @@ SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
if ((VT == MVT::v2i64 || VT == MVT::v2f64) && !Subtarget.isPPC64())
return SDValue();
// On P10, we have legal lowering for constant and variable indices for
// integer vectors.
// all vectors.
if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
VT == MVT::v2i64)
return DAG.getNode(PPCISD::VECINSERT, dl, VT, V1, V2, V3);
// For f32 and f64 vectors, we have legal lowering for variable indices.
// For f32 we also have legal lowering when the element is loaded from
// memory.
if (VT == MVT::v4f32 || VT == MVT::v2f64) {
if (!C || (VT == MVT::v4f32 && isa<LoadSDNode>(V2)))
return DAG.getNode(PPCISD::VECINSERT, dl, VT, V1, V2, V3);
VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64)
return Op;
}
}

// Before P10, we have legal lowering for constant indices but not for
Expand Down
169 changes: 79 additions & 90 deletions llvm/lib/Target/PowerPC/PPCInstrPrefix.td
Expand Up @@ -29,9 +29,6 @@ def SDT_PPCPairExtractVsx : SDTypeProfile<1, 2, [
def SDT_PPCxxmfacc : SDTypeProfile<1, 1, [
SDTCisVT<0, v512i1>, SDTCisVT<1, v512i1>
]>;
def SDT_PPCVecInsertElt : SDTypeProfile<1, 3, [
SDTCisVec<0>, SDTCisVec<1>, SDTCisInt<3>
]>;

//===----------------------------------------------------------------------===//
// ISA 3.1 specific PPCISD nodes.
Expand All @@ -45,7 +42,6 @@ def PPCAccExtractVsx : SDNode<"PPCISD::EXTRACT_VSX_REG", SDT_PPCAccExtractVsx,
def PPCPairExtractVsx : SDNode<"PPCISD::EXTRACT_VSX_REG", SDT_PPCPairExtractVsx,
[]>;
def PPCxxmfacc : SDNode<"PPCISD::XXMFACC", SDT_PPCxxmfacc, []>;
def PPCvecinsertelt : SDNode<"PPCISD::VECINSERT", SDT_PPCVecInsertElt, []>;

//===----------------------------------------------------------------------===//

Expand Down Expand Up @@ -2797,135 +2793,128 @@ let Predicates = [PrefixInstrs] in {
}

def InsertEltShift {
dag Sub32Left0 = (EXTRACT_SUBREG $rB, sub_32);
dag Sub32 = (i32 (EXTRACT_SUBREG $rB, sub_32));
dag Sub32Left1 = (RLWINM (EXTRACT_SUBREG $rB, sub_32), 1, 0, 30);
dag Sub32Left2 = (RLWINM (EXTRACT_SUBREG $rB, sub_32), 2, 0, 29);
dag Left1 = (RLWINM $rB, 1, 0, 30);
dag Left2 = (RLWINM $rB, 2, 0, 29);
dag Left3 = (RLWINM8 $rB, 3, 0, 28);
}

let Predicates = [IsISA3_1, HasVSX, IsLittleEndian] in {
// Indexed vector insert element
def : Pat<(v16i8 (PPCvecinsertelt v16i8:$vDi, i32:$rA, i64:$rB)),
(VINSBRX $vDi, InsertEltShift.Sub32Left0, $rA)>;
def : Pat<(v8i16 (PPCvecinsertelt v8i16:$vDi, i32:$rA, i64:$rB)),
def : Pat<(v16i8 (vector_insert v16i8:$vDi, i32:$rA, i64:$rB)),
(VINSBRX $vDi, InsertEltShift.Sub32, $rA)>;
def : Pat<(v8i16 (vector_insert v8i16:$vDi, i32:$rA, i64:$rB)),
(VINSHRX $vDi, InsertEltShift.Sub32Left1, $rA)>;
def : Pat<(v4i32 (PPCvecinsertelt v4i32:$vDi, i32:$rA, i64:$rB)),
def : Pat<(v4i32 (insertelt v4i32:$vDi, i32:$rA, i64:$rB)),
(VINSWRX $vDi, InsertEltShift.Sub32Left2, $rA)>;
def : Pat<(v2i64 (PPCvecinsertelt v2i64:$vDi, i64:$rA, i64:$rB)),
def : Pat<(v2i64 (insertelt v2i64:$vDi, i64:$rA, i64:$rB)),
(VINSDRX $vDi, InsertEltShift.Left3, $rA)>;

def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, f32:$A, i64:$rB)),
(VINSWRX $vDi, InsertEltShift.Sub32Left2, Bitcast.FltToInt)>;
def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load iaddr:$rA)), i64:$rB)),
def : Pat<(v4f32 (insertelt v4f32:$vDi, f32:$rA, i64:$rB)),
(VINSWVRX $vDi, InsertEltShift.Sub32Left2, (XSCVDPSPN $rA))>;
def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddr:$rA)), i64:$rB)),
(VINSWRX $vDi, InsertEltShift.Sub32Left2, (LWZ memri:$rA))>;
def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)), i64:$rB)),
def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)), i64:$rB)),
(VINSWRX $vDi, InsertEltShift.Sub32Left2, (PLWZ memri34:$rA))>;
def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load xaddr:$rA)), i64:$rB)),
def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load xaddr:$rA)), i64:$rB)),
(VINSWRX $vDi, InsertEltShift.Sub32Left2, (LWZX memrr:$rA))>;

def : Pat<(v2f64 (PPCvecinsertelt v2f64:$vDi, f64:$A, i64:$rB)),
def : Pat<(v2f64 (insertelt v2f64:$vDi, f64:$A, i64:$rB)),
(VINSDRX $vDi, InsertEltShift.Left3, Bitcast.DblToLong)>;
def : Pat<(v2f64 (PPCvecinsertelt v2f64:$vDi, (f64 (load iaddrX4:$rA)), i64:$rB)),
def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load iaddrX4:$rA)), i64:$rB)),
(VINSDRX $vDi, InsertEltShift.Left3, (LD memrix:$rA))>;
def : Pat<(v2f64 (PPCvecinsertelt v2f64:$vDi, (f64 (load iaddrX34:$rA)), i64:$rB)),
def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load iaddrX34:$rA)), i64:$rB)),
(VINSDRX $vDi, InsertEltShift.Left3, (PLD memri34:$rA))>;
def : Pat<(v2f64 (PPCvecinsertelt v2f64:$vDi, (f64 (load xaddrX4:$rA)), i64:$rB)),
def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load xaddrX4:$rA)), i64:$rB)),
(VINSDRX $vDi, InsertEltShift.Left3, (LDX memrr:$rA))>;

// Immediate vector insert element
foreach i = [0, 1, 2, 3] in {
def : Pat<(v4i32 (PPCvecinsertelt v4i32:$vDi, i32:$rA, (i64 i))),
(VINSW $vDi, !mul(!sub(3, i), 4), $rA)>;
def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load iaddr:$rA)), (i64 i))),
(VINSW $vDi, !mul(!sub(3, i), 4), (LWZ memri:$rA))>;
def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)), (i64 i))),
(VINSW $vDi, !mul(!sub(3, i), 4), (PLWZ memri34:$rA))>;
def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load xaddr:$rA)), (i64 i))),
(VINSW $vDi, !mul(!sub(3, i), 4), (LWZX memrr:$rA))>;
let AddedComplexity = 400 in {
// Immediate vector insert element
foreach Idx = [0, 1, 2, 3] in {
def : Pat<(v4i32 (insertelt v4i32:$vDi, i32:$rA, Idx)),
(VINSW $vDi, !mul(!sub(3, Idx), 4), $rA)>;
def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddr:$rA)), Idx)),
(VINSW $vDi, !mul(!sub(3, Idx), 4), (LWZ memri:$rA))>;
def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)), Idx)),
(VINSW $vDi, !mul(!sub(3, Idx), 4), (PLWZ memri34:$rA))>;
def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load xaddr:$rA)), Idx)),
(VINSW $vDi, !mul(!sub(3, Idx), 4), (LWZX memrr:$rA))>;
}
foreach i = [0, 1] in
def : Pat<(v2i64 (insertelt v2i64:$vDi, i64:$rA, (i64 i))),
(VINSD $vDi, !mul(!sub(1, i), 8), $rA)>;
}
foreach i = [0, 1] in
def : Pat<(v2i64 (PPCvecinsertelt v2i64:$vDi, i64:$rA, (i64 i))),
(VINSD $vDi, !mul(!sub(1, i), 8), $rA)>;
}

let Predicates = [IsISA3_1, HasVSX, IsBigEndian, IsPPC32] in {
// Indexed vector insert element
def : Pat<(v16i8 (PPCvecinsertelt v16i8:$vDi, i32:$rA, i32:$rB)),
def : Pat<(v16i8 (vector_insert v16i8:$vDi, i32:$rA, i32:$rB)),
(VINSBLX $vDi, $rB, $rA)>;
def : Pat<(v8i16 (PPCvecinsertelt v8i16:$vDi, i32:$rA, i32:$rB)),
(VINSHLX $vDi, $rB, $rA)>;
def : Pat<(v4i32 (PPCvecinsertelt v4i32:$vDi, i32:$rA, i32:$rB)),
(VINSWLX $vDi, $rB, $rA)>;

def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, f32:$A, i32:$rB)),
(VINSWLX $vDi, $rB, Bitcast.FltToInt)>;
def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load iaddr:$rA)),
i32:$rB)),
(VINSWLX $vDi, $rB, (LWZ memri:$rA))>;
def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)),
i32:$rB)),
(VINSWLX $vDi, $rB, (PLWZ memri34:$rA))>;
def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load xaddr:$rA)),
i32:$rB)),
(VINSWLX $vDi, $rB, (LWZX memrr:$rA))>;
def : Pat<(v8i16 (vector_insert v8i16:$vDi, i32:$rA, i32:$rB)),
(VINSHLX $vDi, InsertEltShift.Left1, $rA)>;
def : Pat<(v4i32 (insertelt v4i32:$vDi, i32:$rA, i32:$rB)),
(VINSWLX $vDi, InsertEltShift.Left2, $rA)>;

// Immediate vector insert element
foreach i = [0, 1, 2, 3] in {
def : Pat<(v4i32 (PPCvecinsertelt v4i32:$vDi, i32:$rA, (i32 i))),
(VINSW $vDi, !mul(i, 4), $rA)>;
def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load iaddr:$rA)),
(i32 i))),
(VINSW $vDi, !mul(i, 4), (LWZ memri:$rA))>;
def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)),
(i32 i))),
(VINSW $vDi, !mul(i, 4), (PLWZ memri34:$rA))>;
def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load xaddr:$rA)),
(i32 i))),
(VINSW $vDi, !mul(i, 4), (LWZX memrr:$rA))>;
}
def : Pat<(v4f32 (insertelt v4f32:$vDi, f32:$rA, i32:$rB)),
(VINSWVLX $vDi, InsertEltShift.Left2, (XSCVDPSPN $rA))>;
def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddr:$rA)), i32:$rB)),
(VINSWLX v4f32:$vDi, InsertEltShift.Left2, (LWZ memri:$rA))>;
def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)), i32:$rB)),
(VINSWLX v4f32:$vDi, InsertEltShift.Left2, (PLWZ memri34:$rA))>;
def: Pat<(v4f32(insertelt v4f32 : $vDi, (f32(load xaddr : $rA)), i32 : $rB)),
(VINSWLX v4f32 : $vDi, InsertEltShift.Left2, (LWZX memrr : $rA))>;
}

let Predicates = [IsISA3_1, HasVSX, IsBigEndian, IsPPC64] in {
// Indexed vector insert element
def : Pat<(v16i8 (PPCvecinsertelt v16i8:$vDi, i32:$rA, i64:$rB)),
(VINSBLX $vDi, InsertEltShift.Sub32Left0, $rA)>;
def : Pat<(v8i16 (PPCvecinsertelt v8i16:$vDi, i32:$rA, i64:$rB)),
def : Pat<(v16i8 (vector_insert v16i8:$vDi, i32:$rA, i64:$rB)),
(VINSBLX $vDi, InsertEltShift.Sub32, $rA)>;
def : Pat<(v8i16 (vector_insert v8i16:$vDi, i32:$rA, i64:$rB)),
(VINSHLX $vDi, InsertEltShift.Sub32Left1, $rA)>;
def : Pat<(v4i32 (PPCvecinsertelt v4i32:$vDi, i32:$rA, i64:$rB)),
def : Pat<(v4i32 (insertelt v4i32:$vDi, i32:$rA, i64:$rB)),
(VINSWLX $vDi, InsertEltShift.Sub32Left2, $rA)>;
def : Pat<(v2i64 (PPCvecinsertelt v2i64:$vDi, i64:$rA, i64:$rB)),
def : Pat<(v2i64 (insertelt v2i64:$vDi, i64:$rA, i64:$rB)),
(VINSDLX $vDi, InsertEltShift.Left3, $rA)>;

def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, f32:$A, i64:$rB)),
(VINSWLX $vDi, InsertEltShift.Sub32Left2, Bitcast.FltToInt)>;
def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load iaddr:$rA)), i64:$rB)),
def : Pat<(v4f32 (insertelt v4f32:$vDi, f32:$rA, i64:$rB)),
(VINSWVLX $vDi, InsertEltShift.Sub32Left2, (XSCVDPSPN $rA))>;
def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddr:$rA)), i64:$rB)),
(VINSWLX $vDi, InsertEltShift.Sub32Left2, (LWZ memri:$rA))>;
def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)), i64:$rB)),
def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)), i64:$rB)),
(VINSWLX $vDi, InsertEltShift.Sub32Left2, (PLWZ memri34:$rA))>;
def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load xaddr:$rA)), i64:$rB)),
def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load xaddr:$rA)), i64:$rB)),
(VINSWLX $vDi, InsertEltShift.Sub32Left2, (LWZX memrr:$rA))>;

def : Pat<(v2f64 (PPCvecinsertelt v2f64:$vDi, f64:$A, i64:$rB)),
def : Pat<(v2f64 (insertelt v2f64:$vDi, f64:$A, i64:$rB)),
(VINSDLX $vDi, InsertEltShift.Left3, Bitcast.DblToLong)>;
def : Pat<(v2f64 (PPCvecinsertelt v2f64:$vDi, (f64 (load iaddrX4:$rA)), i64:$rB)),
def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load iaddrX4:$rA)), i64:$rB)),
(VINSDLX $vDi, InsertEltShift.Left3, (LD memrix:$rA))>;
def : Pat<(v2f64 (PPCvecinsertelt v2f64:$vDi, (f64 (load iaddrX34:$rA)), i64:$rB)),
def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load iaddrX34:$rA)), i64:$rB)),
(VINSDLX $vDi, InsertEltShift.Left3, (PLD memri34:$rA))>;
def : Pat<(v2f64 (PPCvecinsertelt v2f64:$vDi, (f64 (load xaddrX4:$rA)), i64:$rB)),
def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load xaddrX4:$rA)), i64:$rB)),
(VINSDLX $vDi, InsertEltShift.Left3, (LDX memrr:$rA))>;
}

let AddedComplexity = 400, Predicates = [IsISA3_1, HasVSX, IsBigEndian] in {
// Immediate vector insert element
foreach i = [0, 1, 2, 3] in {
def : Pat<(v4i32 (PPCvecinsertelt v4i32:$vDi, i32:$rA, (i64 i))),
(VINSW $vDi, !mul(i, 4), $rA)>;
def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load iaddr:$rA)), (i64 i))),
(VINSW $vDi, !mul(i, 4), (LWZ memri:$rA))>;
def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)), (i64 i))),
(VINSW $vDi, !mul(i, 4), (PLWZ memri34:$rA))>;
def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load xaddr:$rA)), (i64 i))),
(VINSW $vDi, !mul(i, 4), (LWZX memrr:$rA))>;
foreach Ty = [i32, i64] in {
foreach Idx = [0, 1, 2, 3] in {
def : Pat<(v4i32 (insertelt v4i32:$vDi, i32:$rA, (Ty Idx))),
(VINSW $vDi, !mul(Idx, 4), $rA)>;
def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddr:$rA)),
(Ty Idx))),
(VINSW $vDi, !mul(Idx, 4), (LWZ memri:$rA))>;
def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)),
(Ty Idx))),
(VINSW $vDi, !mul(Idx, 4), (PLWZ memri34:$rA))>;
def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load xaddr:$rA)),
(Ty Idx))),
(VINSW $vDi, !mul(Idx, 4), (LWZX memrr:$rA))>;
}
}
foreach i = [0, 1] in
def : Pat<(v2i64 (PPCvecinsertelt v2i64:$vDi, i64:$rA, (i64 i))),
(VINSD $vDi, !mul(i, 8), $rA)>;

foreach Idx = [0, 1] in
def : Pat<(v2i64 (insertelt v2i64:$vDi, i64:$rA, Idx)),
(VINSD $vDi, !mul(Idx, 8), $rA)>;
}
33 changes: 20 additions & 13 deletions llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll
Expand Up @@ -69,7 +69,8 @@ define <8 x i16> @testHalf(<8 x i16> %a, i64 %b, i64 %idx) {
;
; CHECK-32-P10-LABEL: testHalf:
; CHECK-32-P10: # %bb.0: # %entry
; CHECK-32-P10-NEXT: vinshlx 2, 6, 4
; CHECK-32-P10-NEXT: slwi 3, 6, 1
; CHECK-32-P10-NEXT: vinshlx 2, 3, 4
; CHECK-32-P10-NEXT: blr
entry:
%conv = trunc i64 %b to i16
Expand Down Expand Up @@ -106,7 +107,8 @@ define <4 x i32> @testWord(<4 x i32> %a, i64 %b, i64 %idx) {
;
; CHECK-32-P10-LABEL: testWord:
; CHECK-32-P10: # %bb.0: # %entry
; CHECK-32-P10-NEXT: vinswlx 2, 6, 4
; CHECK-32-P10-NEXT: slwi 3, 6, 2
; CHECK-32-P10-NEXT: vinswlx 2, 3, 4
; CHECK-32-P10-NEXT: blr
entry:
%conv = trunc i64 %b to i32
Expand Down Expand Up @@ -186,8 +188,10 @@ define <2 x i64> @testDoubleword(<2 x i64> %a, i64 %b, i64 %idx) {
; CHECK-32-P10-LABEL: testDoubleword:
; CHECK-32-P10: # %bb.0: # %entry
; CHECK-32-P10-NEXT: add 5, 6, 6
; CHECK-32-P10-NEXT: vinswlx 2, 5, 3
; CHECK-32-P10-NEXT: slwi 6, 5, 2
; CHECK-32-P10-NEXT: vinswlx 2, 6, 3
; CHECK-32-P10-NEXT: addi 3, 5, 1
; CHECK-32-P10-NEXT: slwi 3, 3, 2
; CHECK-32-P10-NEXT: vinswlx 2, 3, 4
; CHECK-32-P10-NEXT: blr
entry:
Expand Down Expand Up @@ -280,18 +284,17 @@ define <4 x float> @testFloat1(<4 x float> %a, float %b, i32 zeroext %idx1) {
;
; CHECK-64-P10-LABEL: testFloat1:
; CHECK-64-P10: # %bb.0: # %entry
; CHECK-64-P10-NEXT: xscvdpspn 0, 1
; CHECK-64-P10-NEXT: extsw 4, 4
; CHECK-64-P10-NEXT: slwi 4, 4, 2
; CHECK-64-P10-NEXT: mffprwz 3, 0
; CHECK-64-P10-NEXT: vinswlx 2, 4, 3
; CHECK-64-P10-NEXT: xscvdpspn 35, 1
; CHECK-64-P10-NEXT: extsw 3, 4
; CHECK-64-P10-NEXT: slwi 3, 3, 2
; CHECK-64-P10-NEXT: vinswvlx 2, 3, 3
; CHECK-64-P10-NEXT: blr
;
; CHECK-32-P10-LABEL: testFloat1:
; CHECK-32-P10: # %bb.0: # %entry
; CHECK-32-P10-NEXT: xscvdpspn 0, 1
; CHECK-32-P10-NEXT: mffprwz 3, 0
; CHECK-32-P10-NEXT: vinswlx 2, 4, 3
; CHECK-32-P10-NEXT: xscvdpspn 35, 1
; CHECK-32-P10-NEXT: slwi 3, 4, 2
; CHECK-32-P10-NEXT: vinswvlx 2, 3, 3
; CHECK-32-P10-NEXT: blr
entry:
%vecins = insertelement <4 x float> %a, float %b, i32 %idx1
Expand Down Expand Up @@ -347,8 +350,10 @@ define <4 x float> @testFloat2(<4 x float> %a, i8* %b, i32 zeroext %idx1, i32 ze
; CHECK-32-P10: # %bb.0: # %entry
; CHECK-32-P10-NEXT: lwz 6, 0(3)
; CHECK-32-P10-NEXT: lwz 3, 1(3)
; CHECK-32-P10-NEXT: slwi 4, 4, 2
; CHECK-32-P10-NEXT: vinswlx 2, 4, 6
; CHECK-32-P10-NEXT: vinswlx 2, 5, 3
; CHECK-32-P10-NEXT: slwi 4, 5, 2
; CHECK-32-P10-NEXT: vinswlx 2, 4, 3
; CHECK-32-P10-NEXT: blr
entry:
%0 = bitcast i8* %b to float*
Expand Down Expand Up @@ -415,10 +420,12 @@ define <4 x float> @testFloat3(<4 x float> %a, i8* %b, i32 zeroext %idx1, i32 ze
; CHECK-32-P10-LABEL: testFloat3:
; CHECK-32-P10: # %bb.0: # %entry
; CHECK-32-P10-NEXT: lis 6, 1
; CHECK-32-P10-NEXT: slwi 4, 4, 2
; CHECK-32-P10-NEXT: lwzx 6, 3, 6
; CHECK-32-P10-NEXT: lwz 3, 0(3)
; CHECK-32-P10-NEXT: vinswlx 2, 4, 6
; CHECK-32-P10-NEXT: vinswlx 2, 5, 3
; CHECK-32-P10-NEXT: slwi 4, 5, 2
; CHECK-32-P10-NEXT: vinswlx 2, 4, 3
; CHECK-32-P10-NEXT: blr
entry:
%add.ptr = getelementptr inbounds i8, i8* %b, i64 65536
Expand Down

0 comments on commit f50c6c1

Please sign in to comment.