Skip to content

Commit

Permalink
[GlobalISel][AArch64] Combine Vector Reduction Add Long (#76241)
Browse files Browse the repository at this point in the history
ADDLV(ADDLP) => ADDLV
Removes unnecessary ADDLP instruction
Already exists for SDAG, adding for GlobalISel
  • Loading branch information
chuongg3 committed Jan 22, 2024
1 parent a43c192 commit 50df08c
Show file tree
Hide file tree
Showing 10 changed files with 938 additions and 312 deletions.
15 changes: 15 additions & 0 deletions llvm/lib/Target/AArch64/AArch64InstrGISel.td
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,18 @@ def G_SMULL : AArch64GenericInstruction {
let hasSideEffects = 0;
}

def G_UADDLP : AArch64GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src1);
let hasSideEffects = 0;
}

def G_SADDLP : AArch64GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src1);
let hasSideEffects = 0;
}

def G_UADDLV : AArch64GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src1);
Expand Down Expand Up @@ -294,6 +306,9 @@ def : GINodeEquiv<G_BSP, AArch64bsp>;
def : GINodeEquiv<G_UMULL, AArch64umull>;
def : GINodeEquiv<G_SMULL, AArch64smull>;

def : GINodeEquiv<G_SADDLP, AArch64saddlp_n>;
def : GINodeEquiv<G_UADDLP, AArch64uaddlp_n>;

def : GINodeEquiv<G_SADDLV, AArch64saddlv>;
def : GINodeEquiv<G_UADDLV, AArch64uaddlv>;

Expand Down
23 changes: 23 additions & 0 deletions llvm/lib/Target/AArch64/AArch64InstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -6664,6 +6664,26 @@ multiclass SIMDAcrossLaneLongPairIntrinsic<string Opc, SDPatternOperator addlp>
defm : SIMDAcrossLaneLongPairIntrinsic<"UADDLV", AArch64uaddlp>;
defm : SIMDAcrossLaneLongPairIntrinsic<"SADDLV", AArch64saddlp>;

// Pattern is used for GlobalISel
multiclass SIMDAcrossLaneLongPairIntrinsicGISel<string Opc, SDPatternOperator addlp> {
// Patterns for addv(addlp(x)) ==> addlv
def : Pat<(i16 (vecreduce_add (v4i16 (addlp (v8i8 V64:$Rn))))),
(!cast<Instruction>(Opc#"v8i8v") V64:$Rn)>;
def : Pat<(i16 (vecreduce_add (v8i16 (addlp (v16i8 V128:$Rn))))),
(!cast<Instruction>(Opc#"v16i8v") V128:$Rn)>;
def : Pat<(i32 (vecreduce_add (v4i32 (addlp (v8i16 V128:$Rn))))),
(!cast<Instruction>(Opc#"v8i16v") V128:$Rn)>;

// Patterns for addp(addlp(x))) ==> addlv
def : Pat<(i32 (vecreduce_add (v2i32 (addlp (v4i16 V64:$Rn))))),
(!cast<Instruction>(Opc#"v4i16v") V64:$Rn)>;
def : Pat<(i64 (vecreduce_add (v2i64 (addlp (v4i32 V128:$Rn))))),
(!cast<Instruction>(Opc#"v4i32v") V128:$Rn)>;
}

defm : SIMDAcrossLaneLongPairIntrinsicGISel<"UADDLV", AArch64uaddlp>;
defm : SIMDAcrossLaneLongPairIntrinsicGISel<"SADDLV", AArch64saddlp>;

// Patterns for uaddlv(uaddlp(x)) ==> uaddlv
def : Pat<(i64 (int_aarch64_neon_uaddlv (v4i32 (AArch64uaddlp (v8i16 V128:$op))))),
(i64 (EXTRACT_SUBREG
Expand All @@ -6675,6 +6695,9 @@ def : Pat<(i32 (int_aarch64_neon_uaddlv (v8i16 (AArch64uaddlp (v16i8 V128:$op)))
(v8i16 (SUBREG_TO_REG (i64 0), (UADDLVv16i8v V128:$op), hsub)),
ssub))>;

def : Pat<(v2i64 (AArch64uaddlv (v4i32 (AArch64uaddlp (v8i16 V128:$op))))),
(v2i64 (SUBREG_TO_REG (i64 0), (UADDLVv8i16v V128:$op), ssub))>;

def : Pat<(v4i32 (AArch64uaddlv (v8i16 (AArch64uaddlp (v16i8 V128:$op))))),
(v4i32 (SUBREG_TO_REG (i64 0), (UADDLVv16i8v V128:$op), hsub))>;

Expand Down
51 changes: 51 additions & 0 deletions llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1451,6 +1451,57 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,

return true;
}
case Intrinsic::aarch64_neon_uaddlp:
case Intrinsic::aarch64_neon_saddlp: {
MachineIRBuilder MIB(MI);
MachineRegisterInfo &MRI = *MIB.getMRI();

unsigned Opc = IntrinsicID == Intrinsic::aarch64_neon_uaddlp
? AArch64::G_UADDLP
: AArch64::G_SADDLP;
MIB.buildInstr(Opc, {MI.getOperand(0)}, {MI.getOperand(2)});
MI.eraseFromParent();

return true;
}
case Intrinsic::aarch64_neon_uaddlv:
case Intrinsic::aarch64_neon_saddlv: {
MachineIRBuilder MIB(MI);
MachineRegisterInfo &MRI = *MIB.getMRI();

unsigned Opc = IntrinsicID == Intrinsic::aarch64_neon_uaddlv
? AArch64::G_UADDLV
: AArch64::G_SADDLV;
Register DstReg = MI.getOperand(0).getReg();
Register SrcReg = MI.getOperand(2).getReg();
LLT DstTy = MRI.getType(DstReg);

LLT MidTy, ExtTy;
if (DstTy.isScalar() && DstTy.getScalarSizeInBits() <= 32) {
MidTy = LLT::fixed_vector(4, 32);
ExtTy = LLT::scalar(32);
} else {
MidTy = LLT::fixed_vector(2, 64);
ExtTy = LLT::scalar(64);
}

Register MidReg =
MIB.buildInstr(Opc, {MidTy}, {SrcReg})->getOperand(0).getReg();
Register ZeroReg =
MIB.buildConstant(LLT::scalar(64), 0)->getOperand(0).getReg();
Register ExtReg = MIB.buildInstr(AArch64::G_EXTRACT_VECTOR_ELT, {ExtTy},
{MidReg, ZeroReg})
.getReg(0);

if (DstTy.getScalarSizeInBits() < 32)
MIB.buildTrunc(DstReg, ExtReg);
else
MIB.buildCopy(DstReg, ExtReg);

MI.eraseFromParent();

return true;
}
case Intrinsic::aarch64_neon_smax:
case Intrinsic::aarch64_neon_smin:
case Intrinsic::aarch64_neon_umax:
Expand Down
116 changes: 68 additions & 48 deletions llvm/test/CodeGen/AArch64/GlobalISel/legalize-ctpop.mir
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,10 @@ body: |
; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT %copy(s32)
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[ZEXT]](s64)
; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(<8 x s8>) = G_CTPOP [[BITCAST]](<8 x s8>)
; CHECK-NEXT: %ctpop:_(s32) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlv), [[CTPOP]](<8 x s8>)
; CHECK-NEXT: [[UADDLV:%[0-9]+]]:_(<4 x s32>) = G_UADDLV [[CTPOP]]
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[UADDLV]](<4 x s32>), [[C]](s64)
; CHECK-NEXT: %ctpop:_(s32) = COPY [[EVEC]](s32)
; CHECK-NEXT: $w0 = COPY %ctpop(s32)
; CHECK-NEXT: RET_ReallyLR implicit $w0
;
Expand Down Expand Up @@ -98,8 +101,11 @@ body: |
; CHECK-NEXT: %copy:_(s64) = COPY $x0
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST %copy(s64)
; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(<8 x s8>) = G_CTPOP [[BITCAST]](<8 x s8>)
; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlv), [[CTPOP]](<8 x s8>)
; CHECK-NEXT: %ctpop:_(s64) = G_ZEXT [[INT]](s32)
; CHECK-NEXT: [[UADDLV:%[0-9]+]]:_(<4 x s32>) = G_UADDLV [[CTPOP]]
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[UADDLV]](<4 x s32>), [[C]](s64)
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[EVEC]](s32)
; CHECK-NEXT: %ctpop:_(s64) = G_ZEXT [[COPY]](s32)
; CHECK-NEXT: $x0 = COPY %ctpop(s64)
; CHECK-NEXT: RET_ReallyLR implicit $x0
;
Expand Down Expand Up @@ -131,12 +137,14 @@ body: |
; CHECK-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[COPY]](s64), [[COPY1]](s64)
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[MV]](s128)
; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(<16 x s8>) = G_CTPOP [[BITCAST]](<16 x s8>)
; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlv), [[CTPOP]](<16 x s8>)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[INT]](s32), [[C]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; CHECK-NEXT: [[UADDLV:%[0-9]+]]:_(<4 x s32>) = G_UADDLV [[CTPOP]]
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[UADDLV]](<4 x s32>), [[C]](s64)
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[EVEC]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[C1]](s32)
; CHECK-NEXT: $x0 = COPY [[MV1]](s64)
; CHECK-NEXT: $x1 = COPY [[C1]](s64)
; CHECK-NEXT: $x1 = COPY [[C]](s64)
; CHECK-NEXT: RET_ReallyLR implicit $x0, implicit $x1
;
; CHECK-CSSC-LABEL: name: s128_lower
Expand Down Expand Up @@ -177,9 +185,12 @@ body: |
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C]]
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[AND]](s64)
; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(<8 x s8>) = G_CTPOP [[BITCAST]](<8 x s8>)
; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlv), [[CTPOP]](<8 x s8>)
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[INT]](s32)
; CHECK-NEXT: $w0 = COPY [[COPY]](s32)
; CHECK-NEXT: [[UADDLV:%[0-9]+]]:_(<4 x s32>) = G_UADDLV [[CTPOP]]
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[UADDLV]](<4 x s32>), [[C1]](s64)
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[EVEC]](s32)
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: $w0 = COPY [[COPY1]](s32)
; CHECK-NEXT: RET_ReallyLR implicit $w0
;
; CHECK-CSSC-LABEL: name: widen_s16
Expand Down Expand Up @@ -216,9 +227,12 @@ body: |
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C]]
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[AND]](s64)
; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(<8 x s8>) = G_CTPOP [[BITCAST]](<8 x s8>)
; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlv), [[CTPOP]](<8 x s8>)
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[INT]](s32)
; CHECK-NEXT: $w0 = COPY [[COPY]](s32)
; CHECK-NEXT: [[UADDLV:%[0-9]+]]:_(<4 x s32>) = G_UADDLV [[CTPOP]]
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[UADDLV]](<4 x s32>), [[C1]](s64)
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[EVEC]](s32)
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: $w0 = COPY [[COPY1]](s32)
; CHECK-NEXT: RET_ReallyLR implicit $w0
;
; CHECK-CSSC-LABEL: name: widen_s8
Expand Down Expand Up @@ -255,9 +269,12 @@ body: |
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C]]
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[AND]](s64)
; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(<8 x s8>) = G_CTPOP [[BITCAST]](<8 x s8>)
; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlv), [[CTPOP]](<8 x s8>)
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[INT]](s32)
; CHECK-NEXT: $w0 = COPY [[COPY]](s32)
; CHECK-NEXT: [[UADDLV:%[0-9]+]]:_(<4 x s32>) = G_UADDLV [[CTPOP]]
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[UADDLV]](<4 x s32>), [[C1]](s64)
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[EVEC]](s32)
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: $w0 = COPY [[COPY1]](s32)
; CHECK-NEXT: RET_ReallyLR implicit $w0
;
; CHECK-CSSC-LABEL: name: widen_s3
Expand Down Expand Up @@ -293,9 +310,12 @@ body: |
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C]]
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[AND]](s64)
; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(<8 x s8>) = G_CTPOP [[BITCAST]](<8 x s8>)
; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlv), [[CTPOP]](<8 x s8>)
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[INT]](s32)
; CHECK-NEXT: $w0 = COPY [[COPY]](s32)
; CHECK-NEXT: [[UADDLV:%[0-9]+]]:_(<4 x s32>) = G_UADDLV [[CTPOP]]
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[UADDLV]](<4 x s32>), [[C1]](s64)
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[EVEC]](s32)
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: $w0 = COPY [[COPY1]](s32)
; CHECK-NEXT: RET_ReallyLR implicit $w0
;
; CHECK-CSSC-LABEL: name: different_sizes
Expand Down Expand Up @@ -329,8 +349,8 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[COPY]](<8 x s16>)
; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(<16 x s8>) = G_CTPOP [[BITCAST]](<16 x s8>)
; CHECK-NEXT: [[INT:%[0-9]+]]:_(<8 x s16>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlp), [[CTPOP]](<16 x s8>)
; CHECK-NEXT: $q0 = COPY [[INT]](<8 x s16>)
; CHECK-NEXT: [[UADDLP:%[0-9]+]]:_(<8 x s16>) = G_UADDLP [[CTPOP]]
; CHECK-NEXT: $q0 = COPY [[UADDLP]](<8 x s16>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
;
; CHECK-CSSC-LABEL: name: custom_8x16
Expand All @@ -339,8 +359,8 @@ body: |
; CHECK-CSSC-NEXT: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0
; CHECK-CSSC-NEXT: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[COPY]](<8 x s16>)
; CHECK-CSSC-NEXT: [[CTPOP:%[0-9]+]]:_(<16 x s8>) = G_CTPOP [[BITCAST]](<16 x s8>)
; CHECK-CSSC-NEXT: [[INT:%[0-9]+]]:_(<8 x s16>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlp), [[CTPOP]](<16 x s8>)
; CHECK-CSSC-NEXT: $q0 = COPY [[INT]](<8 x s16>)
; CHECK-CSSC-NEXT: [[UADDLP:%[0-9]+]]:_(<8 x s16>) = G_UADDLP [[CTPOP]]
; CHECK-CSSC-NEXT: $q0 = COPY [[UADDLP]](<8 x s16>)
; CHECK-CSSC-NEXT: RET_ReallyLR implicit $q0
%0:_(<8 x s16>) = COPY $q0
%1:_(<8 x s16>) = G_CTPOP %0(<8 x s16>)
Expand All @@ -361,9 +381,9 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[COPY]](<4 x s32>)
; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(<16 x s8>) = G_CTPOP [[BITCAST]](<16 x s8>)
; CHECK-NEXT: [[INT:%[0-9]+]]:_(<8 x s16>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlp), [[CTPOP]](<16 x s8>)
; CHECK-NEXT: [[INT1:%[0-9]+]]:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlp), [[INT]](<8 x s16>)
; CHECK-NEXT: $q0 = COPY [[INT1]](<4 x s32>)
; CHECK-NEXT: [[UADDLP:%[0-9]+]]:_(<8 x s16>) = G_UADDLP [[CTPOP]]
; CHECK-NEXT: [[UADDLP1:%[0-9]+]]:_(<4 x s32>) = G_UADDLP [[UADDLP]]
; CHECK-NEXT: $q0 = COPY [[UADDLP1]](<4 x s32>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
;
; CHECK-CSSC-LABEL: name: custom_4x32
Expand All @@ -372,9 +392,9 @@ body: |
; CHECK-CSSC-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
; CHECK-CSSC-NEXT: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[COPY]](<4 x s32>)
; CHECK-CSSC-NEXT: [[CTPOP:%[0-9]+]]:_(<16 x s8>) = G_CTPOP [[BITCAST]](<16 x s8>)
; CHECK-CSSC-NEXT: [[INT:%[0-9]+]]:_(<8 x s16>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlp), [[CTPOP]](<16 x s8>)
; CHECK-CSSC-NEXT: [[INT1:%[0-9]+]]:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlp), [[INT]](<8 x s16>)
; CHECK-CSSC-NEXT: $q0 = COPY [[INT1]](<4 x s32>)
; CHECK-CSSC-NEXT: [[UADDLP:%[0-9]+]]:_(<8 x s16>) = G_UADDLP [[CTPOP]]
; CHECK-CSSC-NEXT: [[UADDLP1:%[0-9]+]]:_(<4 x s32>) = G_UADDLP [[UADDLP]]
; CHECK-CSSC-NEXT: $q0 = COPY [[UADDLP1]](<4 x s32>)
; CHECK-CSSC-NEXT: RET_ReallyLR implicit $q0
%0:_(<4 x s32>) = COPY $q0
%1:_(<4 x s32>) = G_CTPOP %0(<4 x s32>)
Expand All @@ -395,10 +415,10 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[COPY]](<2 x s64>)
; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(<16 x s8>) = G_CTPOP [[BITCAST]](<16 x s8>)
; CHECK-NEXT: [[INT:%[0-9]+]]:_(<8 x s16>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlp), [[CTPOP]](<16 x s8>)
; CHECK-NEXT: [[INT1:%[0-9]+]]:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlp), [[INT]](<8 x s16>)
; CHECK-NEXT: [[INT2:%[0-9]+]]:_(<2 x s64>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlp), [[INT1]](<4 x s32>)
; CHECK-NEXT: $q0 = COPY [[INT2]](<2 x s64>)
; CHECK-NEXT: [[UADDLP:%[0-9]+]]:_(<8 x s16>) = G_UADDLP [[CTPOP]]
; CHECK-NEXT: [[UADDLP1:%[0-9]+]]:_(<4 x s32>) = G_UADDLP [[UADDLP]]
; CHECK-NEXT: [[UADDLP2:%[0-9]+]]:_(<2 x s64>) = G_UADDLP [[UADDLP1]]
; CHECK-NEXT: $q0 = COPY [[UADDLP2]](<2 x s64>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
;
; CHECK-CSSC-LABEL: name: custom_2x64
Expand All @@ -407,10 +427,10 @@ body: |
; CHECK-CSSC-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
; CHECK-CSSC-NEXT: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[COPY]](<2 x s64>)
; CHECK-CSSC-NEXT: [[CTPOP:%[0-9]+]]:_(<16 x s8>) = G_CTPOP [[BITCAST]](<16 x s8>)
; CHECK-CSSC-NEXT: [[INT:%[0-9]+]]:_(<8 x s16>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlp), [[CTPOP]](<16 x s8>)
; CHECK-CSSC-NEXT: [[INT1:%[0-9]+]]:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlp), [[INT]](<8 x s16>)
; CHECK-CSSC-NEXT: [[INT2:%[0-9]+]]:_(<2 x s64>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlp), [[INT1]](<4 x s32>)
; CHECK-CSSC-NEXT: $q0 = COPY [[INT2]](<2 x s64>)
; CHECK-CSSC-NEXT: [[UADDLP:%[0-9]+]]:_(<8 x s16>) = G_UADDLP [[CTPOP]]
; CHECK-CSSC-NEXT: [[UADDLP1:%[0-9]+]]:_(<4 x s32>) = G_UADDLP [[UADDLP]]
; CHECK-CSSC-NEXT: [[UADDLP2:%[0-9]+]]:_(<2 x s64>) = G_UADDLP [[UADDLP1]]
; CHECK-CSSC-NEXT: $q0 = COPY [[UADDLP2]](<2 x s64>)
; CHECK-CSSC-NEXT: RET_ReallyLR implicit $q0
%0:_(<2 x s64>) = COPY $q0
%1:_(<2 x s64>) = G_CTPOP %0(<2 x s64>)
Expand All @@ -431,8 +451,8 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $d0
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[COPY]](<4 x s16>)
; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(<8 x s8>) = G_CTPOP [[BITCAST]](<8 x s8>)
; CHECK-NEXT: [[INT:%[0-9]+]]:_(<4 x s16>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlp), [[CTPOP]](<8 x s8>)
; CHECK-NEXT: $d0 = COPY [[INT]](<4 x s16>)
; CHECK-NEXT: [[UADDLP:%[0-9]+]]:_(<4 x s16>) = G_UADDLP [[CTPOP]]
; CHECK-NEXT: $d0 = COPY [[UADDLP]](<4 x s16>)
; CHECK-NEXT: RET_ReallyLR implicit $d0
;
; CHECK-CSSC-LABEL: name: custom_4x16
Expand All @@ -441,8 +461,8 @@ body: |
; CHECK-CSSC-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $d0
; CHECK-CSSC-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[COPY]](<4 x s16>)
; CHECK-CSSC-NEXT: [[CTPOP:%[0-9]+]]:_(<8 x s8>) = G_CTPOP [[BITCAST]](<8 x s8>)
; CHECK-CSSC-NEXT: [[INT:%[0-9]+]]:_(<4 x s16>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlp), [[CTPOP]](<8 x s8>)
; CHECK-CSSC-NEXT: $d0 = COPY [[INT]](<4 x s16>)
; CHECK-CSSC-NEXT: [[UADDLP:%[0-9]+]]:_(<4 x s16>) = G_UADDLP [[CTPOP]]
; CHECK-CSSC-NEXT: $d0 = COPY [[UADDLP]](<4 x s16>)
; CHECK-CSSC-NEXT: RET_ReallyLR implicit $d0
%0:_(<4 x s16>) = COPY $d0
%1:_(<4 x s16>) = G_CTPOP %0(<4 x s16>)
Expand All @@ -463,9 +483,9 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[COPY]](<2 x s32>)
; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(<8 x s8>) = G_CTPOP [[BITCAST]](<8 x s8>)
; CHECK-NEXT: [[INT:%[0-9]+]]:_(<4 x s16>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlp), [[CTPOP]](<8 x s8>)
; CHECK-NEXT: [[INT1:%[0-9]+]]:_(<2 x s32>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlp), [[INT]](<4 x s16>)
; CHECK-NEXT: $d0 = COPY [[INT1]](<2 x s32>)
; CHECK-NEXT: [[UADDLP:%[0-9]+]]:_(<4 x s16>) = G_UADDLP [[CTPOP]]
; CHECK-NEXT: [[UADDLP1:%[0-9]+]]:_(<2 x s32>) = G_UADDLP [[UADDLP]]
; CHECK-NEXT: $d0 = COPY [[UADDLP1]](<2 x s32>)
; CHECK-NEXT: RET_ReallyLR implicit $d0
;
; CHECK-CSSC-LABEL: name: custom_2x32
Expand All @@ -474,9 +494,9 @@ body: |
; CHECK-CSSC-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
; CHECK-CSSC-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[COPY]](<2 x s32>)
; CHECK-CSSC-NEXT: [[CTPOP:%[0-9]+]]:_(<8 x s8>) = G_CTPOP [[BITCAST]](<8 x s8>)
; CHECK-CSSC-NEXT: [[INT:%[0-9]+]]:_(<4 x s16>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlp), [[CTPOP]](<8 x s8>)
; CHECK-CSSC-NEXT: [[INT1:%[0-9]+]]:_(<2 x s32>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlp), [[INT]](<4 x s16>)
; CHECK-CSSC-NEXT: $d0 = COPY [[INT1]](<2 x s32>)
; CHECK-CSSC-NEXT: [[UADDLP:%[0-9]+]]:_(<4 x s16>) = G_UADDLP [[CTPOP]]
; CHECK-CSSC-NEXT: [[UADDLP1:%[0-9]+]]:_(<2 x s32>) = G_UADDLP [[UADDLP]]
; CHECK-CSSC-NEXT: $d0 = COPY [[UADDLP1]](<2 x s32>)
; CHECK-CSSC-NEXT: RET_ReallyLR implicit $d0
%0:_(<2 x s32>) = COPY $d0
%1:_(<2 x s32>) = G_CTPOP %0(<2 x s32>)
Expand Down
Loading

0 comments on commit 50df08c

Please sign in to comment.