diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index f4f16a2e0fb106..0d3857743cb335 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -11052,23 +11052,17 @@ static SDValue GeneratePerfectShuffle(unsigned ID, SDValue V1, DAG.getConstant(Imm, dl, MVT::i32)); } case OP_VUZPL: - return DAG.getNode(AArch64ISD::UZP1, dl, DAG.getVTList(VT, VT), OpLHS, - OpRHS); + return DAG.getNode(AArch64ISD::UZP1, dl, VT, OpLHS, OpRHS); case OP_VUZPR: - return DAG.getNode(AArch64ISD::UZP2, dl, DAG.getVTList(VT, VT), OpLHS, - OpRHS); + return DAG.getNode(AArch64ISD::UZP2, dl, VT, OpLHS, OpRHS); case OP_VZIPL: - return DAG.getNode(AArch64ISD::ZIP1, dl, DAG.getVTList(VT, VT), OpLHS, - OpRHS); + return DAG.getNode(AArch64ISD::ZIP1, dl, VT, OpLHS, OpRHS); case OP_VZIPR: - return DAG.getNode(AArch64ISD::ZIP2, dl, DAG.getVTList(VT, VT), OpLHS, - OpRHS); + return DAG.getNode(AArch64ISD::ZIP2, dl, VT, OpLHS, OpRHS); case OP_VTRNL: - return DAG.getNode(AArch64ISD::TRN1, dl, DAG.getVTList(VT, VT), OpLHS, - OpRHS); + return DAG.getNode(AArch64ISD::TRN1, dl, VT, OpLHS, OpRHS); case OP_VTRNR: - return DAG.getNode(AArch64ISD::TRN2, dl, DAG.getVTList(VT, VT), OpLHS, - OpRHS); + return DAG.getNode(AArch64ISD::TRN2, dl, VT, OpLHS, OpRHS); } } diff --git a/llvm/test/CodeGen/AArch64/arm64-zip.ll b/llvm/test/CodeGen/AArch64/arm64-zip.ll index 02508c2dfdc691..05847394e0f96e 100644 --- a/llvm/test/CodeGen/AArch64/arm64-zip.ll +++ b/llvm/test/CodeGen/AArch64/arm64-zip.ll @@ -274,3 +274,67 @@ define <16 x i8> @combine_v8i16_8firstundef(<8 x i8> %0, <8 x i8> %1) { %3 = shufflevector <8 x i8> %1, <8 x i8> %0, <16 x i32> ret <16 x i8> %3 } + +define <4 x float> @shuffle_zip1(<4 x float> %arg) { +; CHECK-LABEL: shuffle_zip1: +; CHECK: // %bb.0: // %bb +; CHECK-NEXT: movi.2d v1, #0000000000000000 +; CHECK-NEXT: fcmgt.4s v0, v0, v1 +; CHECK-NEXT: uzp1.8h v1, v0, v0 +; CHECK-NEXT: xtn.4h v0, v0 +; CHECK-NEXT: xtn.4h v1, v1 +; CHECK-NEXT: zip2.4h v0, v0, v1 +; CHECK-NEXT: fmov.4s v1, #1.00000000 +; CHECK-NEXT: zip1.4h v0, v0, v0 +; CHECK-NEXT: sshll.4s v0, v0, #0 +; CHECK-NEXT: and.16b v0, v1, v0 +; CHECK-NEXT: ret +bb: + %inst = fcmp olt <4 x float> zeroinitializer, %arg + %inst1 = shufflevector <4 x i1> %inst, <4 x i1> zeroinitializer, <2 x i32> + %inst2 = shufflevector <2 x i1> %inst1, <2 x i1> zeroinitializer, <4 x i32> + %inst3 = select <4 x i1> %inst2, <4 x float> , <4 x float> zeroinitializer + ret <4 x float> %inst3 +} + +define <4 x i32> @shuffle_zip2(<4 x i32> %arg) { +; CHECK-LABEL: shuffle_zip2: +; CHECK: // %bb.0: // %bb +; CHECK-NEXT: cmtst.4s v0, v0, v0 +; CHECK-NEXT: uzp1.8h v1, v0, v0 +; CHECK-NEXT: xtn.4h v0, v0 +; CHECK-NEXT: xtn.4h v1, v1 +; CHECK-NEXT: zip2.4h v0, v0, v1 +; CHECK-NEXT: movi.4s v1, #1 +; CHECK-NEXT: zip1.4h v0, v0, v0 +; CHECK-NEXT: ushll.4s v0, v0, #0 +; CHECK-NEXT: and.16b v0, v0, v1 +; CHECK-NEXT: ret +bb: + %inst = icmp ult <4 x i32> zeroinitializer, %arg + %inst1 = shufflevector <4 x i1> %inst, <4 x i1> zeroinitializer, <2 x i32> + %inst2 = shufflevector <2 x i1> %inst1, <2 x i1> zeroinitializer, <4 x i32> + %inst3 = select <4 x i1> %inst2, <4 x i32> , <4 x i32> zeroinitializer + ret <4 x i32> %inst3 +} + +define <4 x i32> @shuffle_zip3(<4 x i32> %arg) { +; CHECK-LABEL: shuffle_zip3: +; CHECK: // %bb.0: // %bb +; CHECK-NEXT: cmgt.4s v0, v0, #0 +; CHECK-NEXT: uzp1.8h v1, v0, v0 +; CHECK-NEXT: xtn.4h v0, v0 +; CHECK-NEXT: xtn.4h v1, v1 +; CHECK-NEXT: zip2.4h v0, v0, v1 +; CHECK-NEXT: movi.4s v1, #1 +; CHECK-NEXT: zip1.4h v0, v0, v0 +; CHECK-NEXT: ushll.4s v0, v0, #0 +; CHECK-NEXT: and.16b v0, v0, v1 +; CHECK-NEXT: ret +bb: + %inst = icmp slt <4 x i32> zeroinitializer, %arg + %inst1 = shufflevector <4 x i1> %inst, <4 x i1> zeroinitializer, <2 x i32> + %inst2 = shufflevector <2 x i1> %inst1, <2 x i1> zeroinitializer, <4 x i32> + %inst3 = select <4 x i1> %inst2, <4 x i32> , <4 x i32> zeroinitializer + ret <4 x i32> %inst3 +}