Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 27 additions & 22 deletions llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -287,7 +287,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::UNDEF, VT, Legal);

setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);

setOperationAction(ISD::SETCC, VT, Legal);
Expand Down Expand Up @@ -421,12 +421,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
if (Subtarget.hasExtLSX()) {
setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
setTargetDAGCombine(ISD::BITCAST);
}

// Set DAG combine for 'LASX' feature.

if (Subtarget.hasExtLASX())
setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
}

// Compute derived properties from the register classes.
computeRegisterProperties(Subtarget.getRegisterInfo());
Expand Down Expand Up @@ -2834,37 +2830,47 @@ LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
SDLoc DL(Op);
MVT GRLenVT = Subtarget.getGRLenVT();

assert(VecTy.is256BitVector() && "Unexpected EXTRACT_VECTOR_ELT vector type");

if (isa<ConstantSDNode>(Idx))
return Op;

switch (VecTy.getSimpleVT().SimpleTy) {
default:
llvm_unreachable("Unexpected type");
case MVT::v4f32:
case MVT::v2f64:
return Op;
case MVT::v16i8:
case MVT::v8i16:
case MVT::v4i32:
case MVT::v2i64:
case MVT::v32i8:
case MVT::v16i16:
case MVT::v4i64:
case MVT::v4f64: {
// Extract the high half subvector and place it to the low half of a new
// vector. It doesn't matter what the high half of the new vector is.
EVT HalfTy = VecTy.getHalfNumVectorElementsVT(*DAG.getContext());
SDValue VecHi =
DAG.getExtractSubvector(DL, HalfTy, Vec, HalfTy.getVectorNumElements());
SDValue TmpVec =
DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecTy, DAG.getUNDEF(VecTy),
VecHi, DAG.getConstant(0, DL, GRLenVT));
SDValue TmpVec;
if (VecTy.is256BitVector()) {
// Extract the high half subvector and place it to the low half of a new
// vector. It doesn't matter what the high half of the new vector is.
EVT HalfTy = VecTy.getHalfNumVectorElementsVT(*DAG.getContext());
SDValue VecHi = DAG.getExtractSubvector(DL, HalfTy, Vec,
HalfTy.getVectorNumElements());
TmpVec =
DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecTy, DAG.getUNDEF(VecTy),
VecHi, DAG.getConstant(0, DL, GRLenVT));
}

// Shuffle the origin Vec and the TmpVec using MaskVec, the lowest element
// of MaskVec is Idx, the rest do not matter. ResVec[0] will hold the
// desired element.
SDValue IdxCp =
DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Idx);
SDValue IdxVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v8f32, IdxCp);
SDValue IdxVec =
DAG.getNode(ISD::SCALAR_TO_VECTOR, DL,
(VecTy.is128BitVector() ? MVT::v4f32 : MVT::v8f32), IdxCp);
SDValue MaskVec =
DAG.getBitcast((VecTy == MVT::v4f64) ? MVT::v4i64 : VecTy, IdxVec);
SDValue ResVec =
DAG.getNode(LoongArchISD::VSHUF, DL, VecTy, MaskVec, TmpVec, Vec);
DAG.getBitcast(VecTy.changeVectorElementTypeToInteger(), IdxVec);
SDValue ResVec = DAG.getNode(LoongArchISD::VSHUF, DL, VecTy, MaskVec,
(VecTy.is128BitVector() ? Vec : TmpVec), Vec);

return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, ResVec,
DAG.getConstant(0, DL, GRLenVT));
Expand Down Expand Up @@ -6254,12 +6260,11 @@ performEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,

MVT EltVT = N->getSimpleValueType(0);
SDValue Vec = N->getOperand(0);
EVT VecTy = Vec->getValueType(0);
SDValue Idx = N->getOperand(1);
unsigned IdxOp = Idx.getOpcode();
SDLoc DL(N);

if (!VecTy.is256BitVector() || isa<ConstantSDNode>(Idx))
if (isa<ConstantSDNode>(Idx))
return SDValue();

// Combine:
Expand Down
18 changes: 0 additions & 18 deletions llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -2086,24 +2086,6 @@ def : Pat<(f64 (vector_extract v2f64:$vj, uimm1:$imm)),
(f64 (EXTRACT_SUBREG (VREPLVEI_D v2f64:$vj, uimm1:$imm), sub_64))>;

// Vector extraction with variable index.
def : Pat<(i64 (vector_extract v16i8:$vj, i64:$rk)),
(SRAI_W (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG (VREPLVE_B v16i8:$vj,
i64:$rk),
sub_32)),
GPR), (i64 24))>;
def : Pat<(i64 (vector_extract v8i16:$vj, i64:$rk)),
(SRAI_W (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG (VREPLVE_H v8i16:$vj,
i64:$rk),
sub_32)),
GPR), (i64 16))>;
def : Pat<(i64 (vector_extract v4i32:$vj, i64:$rk)),
(COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG (VREPLVE_W v4i32:$vj, i64:$rk),
sub_32)),
GPR)>;
def : Pat<(i64 (vector_extract v2i64:$vj, i64:$rk)),
(COPY_TO_REGCLASS (f64 (EXTRACT_SUBREG (VREPLVE_D v2i64:$vj, i64:$rk),
sub_64)),
GPR)>;
def : Pat<(f32 (vector_extract v4f32:$vj, i64:$rk)),
(f32 (EXTRACT_SUBREG (VREPLVE_W v4f32:$vj, i64:$rk), sub_32))>;
def : Pat<(f64 (vector_extract v2f64:$vj, i64:$rk)),
Expand Down
36 changes: 14 additions & 22 deletions llvm/test/CodeGen/LoongArch/lsx/ir-instruction/extractelement.ll
Original file line number Diff line number Diff line change
Expand Up @@ -77,11 +77,9 @@ define void @extract_16xi8_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
; CHECK-LABEL: extract_16xi8_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vld $vr0, $a0, 0
; CHECK-NEXT: bstrpick.d $a0, $a2, 31, 0
; CHECK-NEXT: vreplve.b $vr0, $vr0, $a0
; CHECK-NEXT: movfr2gr.s $a0, $fa0
; CHECK-NEXT: srai.w $a0, $a0, 24
; CHECK-NEXT: st.b $a0, $a1, 0
; CHECK-NEXT: movgr2fr.w $fa1, $a2
; CHECK-NEXT: vshuf.b $vr0, $vr0, $vr0, $vr1
; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 0
; CHECK-NEXT: ret
%v = load volatile <16 x i8>, ptr %src
%e = extractelement <16 x i8> %v, i32 %idx
Expand All @@ -93,11 +91,9 @@ define void @extract_8xi16_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
; CHECK-LABEL: extract_8xi16_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vld $vr0, $a0, 0
; CHECK-NEXT: bstrpick.d $a0, $a2, 31, 0
; CHECK-NEXT: vreplve.h $vr0, $vr0, $a0
; CHECK-NEXT: movfr2gr.s $a0, $fa0
; CHECK-NEXT: srai.w $a0, $a0, 16
; CHECK-NEXT: st.h $a0, $a1, 0
; CHECK-NEXT: movgr2fr.w $fa1, $a2
; CHECK-NEXT: vshuf.h $vr1, $vr0, $vr0
; CHECK-NEXT: vstelm.h $vr1, $a1, 0, 0
; CHECK-NEXT: ret
%v = load volatile <8 x i16>, ptr %src
%e = extractelement <8 x i16> %v, i32 %idx
Expand All @@ -109,10 +105,9 @@ define void @extract_4xi32_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
; CHECK-LABEL: extract_4xi32_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vld $vr0, $a0, 0
; CHECK-NEXT: bstrpick.d $a0, $a2, 31, 0
; CHECK-NEXT: vreplve.w $vr0, $vr0, $a0
; CHECK-NEXT: movfr2gr.s $a0, $fa0
; CHECK-NEXT: st.w $a0, $a1, 0
; CHECK-NEXT: movgr2fr.w $fa1, $a2
; CHECK-NEXT: vshuf.w $vr1, $vr0, $vr0
; CHECK-NEXT: vstelm.w $vr1, $a1, 0, 0
; CHECK-NEXT: ret
%v = load volatile <4 x i32>, ptr %src
%e = extractelement <4 x i32> %v, i32 %idx
Expand All @@ -124,10 +119,9 @@ define void @extract_2xi64_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
; CHECK-LABEL: extract_2xi64_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vld $vr0, $a0, 0
; CHECK-NEXT: bstrpick.d $a0, $a2, 31, 0
; CHECK-NEXT: vreplve.d $vr0, $vr0, $a0
; CHECK-NEXT: movfr2gr.d $a0, $fa0
; CHECK-NEXT: st.d $a0, $a1, 0
; CHECK-NEXT: movgr2fr.w $fa1, $a2
; CHECK-NEXT: vshuf.d $vr1, $vr0, $vr0
; CHECK-NEXT: vstelm.d $vr1, $a1, 0, 0
; CHECK-NEXT: ret
%v = load volatile <2 x i64>, ptr %src
%e = extractelement <2 x i64> %v, i32 %idx
Expand All @@ -139,8 +133,7 @@ define void @extract_4xfloat_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
; CHECK-LABEL: extract_4xfloat_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vld $vr0, $a0, 0
; CHECK-NEXT: bstrpick.d $a0, $a2, 31, 0
; CHECK-NEXT: vreplve.w $vr0, $vr0, $a0
; CHECK-NEXT: vreplve.w $vr0, $vr0, $a2
; CHECK-NEXT: fst.s $fa0, $a1, 0
; CHECK-NEXT: ret
%v = load volatile <4 x float>, ptr %src
Expand All @@ -153,8 +146,7 @@ define void @extract_2xdouble_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
; CHECK-LABEL: extract_2xdouble_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vld $vr0, $a0, 0
; CHECK-NEXT: bstrpick.d $a0, $a2, 31, 0
; CHECK-NEXT: vreplve.d $vr0, $vr0, $a0
; CHECK-NEXT: vreplve.d $vr0, $vr0, $a2
; CHECK-NEXT: fst.d $fa0, $a1, 0
; CHECK-NEXT: ret
%v = load volatile <2 x double>, ptr %src
Expand Down