diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 1b25da8833e4f..c3a23ea0ad373 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -5421,14 +5421,22 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx, case TargetOpcode::G_FPTOUI: case TargetOpcode::G_SITOFP: case TargetOpcode::G_UITOFP: { - if (TypeIdx != 0) - return UnableToLegalize; Observer.changingInstr(MI); - LLT SrcTy = LLT::fixed_vector( - MoreTy.getNumElements(), - MRI.getType(MI.getOperand(1).getReg()).getElementType()); - moreElementsVectorSrc(MI, SrcTy, 1); - moreElementsVectorDst(MI, MoreTy, 0); + LLT SrcExtTy; + LLT DstExtTy; + if (TypeIdx == 0) { + DstExtTy = MoreTy; + SrcExtTy = LLT::fixed_vector( + MoreTy.getNumElements(), + MRI.getType(MI.getOperand(1).getReg()).getElementType()); + } else { + DstExtTy = LLT::fixed_vector( + MoreTy.getNumElements(), + MRI.getType(MI.getOperand(0).getReg()).getElementType()); + SrcExtTy = MoreTy; + } + moreElementsVectorSrc(MI, SrcExtTy, 1); + moreElementsVectorDst(MI, DstExtTy, 0); Observer.changedInstr(MI); return Legalized; } diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index 34e2c1d9c8e2f..33dba6a5c61ea 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -611,7 +611,9 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) Query.Types[0].isVector() && (Query.Types[1].getScalarSizeInBits() == 8 || Query.Types[1].getScalarSizeInBits() == 16); - }); + }) + .clampMinNumElements(1, s8, 8) + .clampMinNumElements(1, s16, 4); getActionDefinitionsBuilder(G_TRUNC) .legalFor({{v2s32, v2s64}, {v4s16, v4s32}, {v8s8, v8s16}}) @@ -630,7 +632,6 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) }) .clampMinNumElements(0, s8, 8) .clampMinNumElements(0, s16, 4) - .clampMinNumElements(0, s32, 2) .alwaysLegal(); getActionDefinitionsBuilder(G_SEXT_INREG) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-build-vector.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-build-vector.mir index c9556e27c6349..a63d8b9c13772 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-build-vector.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-build-vector.mir @@ -121,10 +121,11 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY $h0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY $h1 - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[COPY]](s16) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[COPY1]](s16) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32) - ; CHECK-NEXT: $d0 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[COPY]](s16), [[COPY1]](s16), [[DEF]](s16), [[DEF]](s16) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<4 x s32>) = G_ANYEXT [[BUILD_VECTOR]](<4 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[ANYEXT]](<4 x s32>) + ; CHECK-NEXT: $d0 = COPY [[UV]](<2 x s32>) ; CHECK-NEXT: RET_ReallyLR %0:_(s16) = COPY $h0 %1:_(s16) = COPY $h1 @@ -141,8 +142,8 @@ body: | ; CHECK-LABEL: name: widen_v2s8 ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[DEF]](s32) - ; CHECK-NEXT: %3:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[DEF]](s32) - ; CHECK-NEXT: $d0 = COPY %3(<2 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[DEF]](s32) + ; CHECK-NEXT: $d0 = COPY [[BUILD_VECTOR]](<2 x s32>) ; CHECK-NEXT: RET_ReallyLR %0:_(s8) = G_IMPLICIT_DEF %1:_(s8) = G_IMPLICIT_DEF @@ -157,12 +158,14 @@ name: widen_v4s8 body: | bb.0: ; CHECK-LABEL: name: widen_v4s8 - ; CHECK: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY [[DEF]](s16) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY [[DEF]](s16) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s16) = COPY [[DEF]](s16) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[COPY]](s16), [[COPY1]](s16), [[COPY2]](s16), [[DEF]](s16) - ; CHECK-NEXT: $d0 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; CHECK: [[DEF:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[DEF]](s8), [[DEF1]](s8), [[DEF2]](s8), [[DEF3]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[BUILD_VECTOR]](<8 x s8>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<4 x s16>), [[UV1:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT]](<8 x s16>) + ; CHECK-NEXT: $d0 = COPY [[UV]](<4 x s16>) ; CHECK-NEXT: RET_ReallyLR %0:_(s8) = G_IMPLICIT_DEF %1:_(s8) = G_IMPLICIT_DEF diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-insert-vector-elt.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-insert-vector-elt.mir index e12353c7ef5be..d3db2432e84cb 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-insert-vector-elt.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-insert-vector-elt.mir @@ -235,31 +235,32 @@ body: | ; CHECK-NEXT: successors: %bb.1(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[COPY2]](s32) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[COPY3]](s32) ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) - ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[DEF2]](s16) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[TRUNC]](s8), [[TRUNC1]](s8), [[TRUNC2]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[BUILD_VECTOR]](<8 x s8>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<4 x s16>), [[UV1:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT]](<8 x s16>) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<4 x s16>) = G_INSERT_VECTOR_ELT [[BUILD_VECTOR]], [[C2]](s16), [[C1]](s64) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[IVEC]](<4 x s16>) - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[UV]](s16) - ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[UV1]](s16) - ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[UV2]](s16) + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<4 x s16>) = G_INSERT_VECTOR_ELT [[UV]], [[C2]](s16), [[C1]](s64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[IVEC]](<4 x s16>) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[UV2]](s16) + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[UV3]](s16) + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[UV4]](s16) ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<16 x s8>) = G_BUILD_VECTOR [[TRUNC3]](s8), [[TRUNC4]](s8), [[TRUNC5]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8) ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<16 x s8>) = G_BUILD_VECTOR [[C]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8) ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<16 x s8>) = G_SHUFFLE_VECTOR [[BUILD_VECTOR1]](<16 x s8>), [[BUILD_VECTOR2]], shufflemask(0, 16, 16, 16, 1, 16, 16, 16, 2, 16, 16, 16, undef, undef, undef, undef) ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[SHUF]](<16 x s8>) ; CHECK-NEXT: [[UITOFP:%[0-9]+]]:_(<4 x s32>) = G_UITOFP [[BITCAST]](<4 x s32>) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UITOFP]](<4 x s32>) - ; CHECK-NEXT: G_STORE [[UV4]](s32), [[COPY]](p0) :: (store (s32), align 16) + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UITOFP]](<4 x s32>) + ; CHECK-NEXT: G_STORE [[UV6]](s32), [[COPY]](p0) :: (store (s32), align 16) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; CHECK-NEXT: G_STORE [[UV5]](s32), [[PTR_ADD]](p0) :: (store (s32) into unknown-address + 4) + ; CHECK-NEXT: G_STORE [[UV7]](s32), [[PTR_ADD]](p0) :: (store (s32) into unknown-address + 4) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CHECK-NEXT: G_STORE [[UV6]](s32), [[PTR_ADD1]](p0) :: (store (s32) into unknown-address + 8, align 8) + ; CHECK-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p0) :: (store (s32) into unknown-address + 8, align 8) ; CHECK-NEXT: G_BR %bb.1 bb.1: liveins: $w1, $w2, $w3, $x0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-select.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-select.mir index 63a26dcfea476..e49a94c12ed46 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-select.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-select.mir @@ -293,41 +293,44 @@ body: | ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), %w0(s32), [[C]] ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ICMP2]], 1 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY [[DEF1]](s16) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY [[DEF1]](s16) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s16) = COPY [[DEF1]](s16) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[COPY]](s16), [[COPY1]](s16), [[COPY2]](s16), [[DEF1]](s16) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[BUILD_VECTOR]](<8 x s8>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<4 x s16>), [[UV1:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT]](<8 x s16>) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SEXT_INREG]](s32) - ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<4 x s16>) = G_INSERT_VECTOR_ELT [[BUILD_VECTOR]], [[TRUNC]](s16), [[C1]](s64) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[IVEC]](<4 x s16>) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[UV]](s16) - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[UV1]](s16) - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[UV2]](s16) - ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[UV3]](s16) - ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[TRUNC1]](s8), [[TRUNC2]](s8), [[TRUNC3]](s8), [[TRUNC4]](s8), [[DEF2]](s8), [[DEF2]](s8), [[DEF2]](s8), [[DEF2]](s8) - ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF2]](s8), [[DEF2]](s8), [[DEF2]](s8), [[DEF2]](s8) + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<4 x s16>) = G_INSERT_VECTOR_ELT [[UV]], [[TRUNC]](s16), [[C1]](s64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[IVEC]](<4 x s16>) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[UV2]](s16) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[UV3]](s16) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[UV4]](s16) + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[UV5]](s16) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[TRUNC1]](s8), [[TRUNC2]](s8), [[TRUNC3]](s8), [[TRUNC4]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8) + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8) ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<8 x s8>) = G_SHUFFLE_VECTOR [[BUILD_VECTOR1]](<8 x s8>), [[BUILD_VECTOR2]], shufflemask(0, 0, 0, 0, undef, undef, undef, undef) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<4 x s8>), [[UV5:%[0-9]+]]:_(<4 x s8>) = G_UNMERGE_VALUES [[SHUF]](<8 x s8>) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s16) = COPY [[C2]](s16) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s16) = COPY [[C2]](s16) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s16) = COPY [[C2]](s16) - ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[COPY3]](s16), [[COPY4]](s16), [[COPY5]](s16), [[C2]](s16) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[UV4]](<4 x s8>) - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<4 x s16>) = G_XOR [[ANYEXT]], [[BUILD_VECTOR3]] + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<4 x s8>), [[UV7:%[0-9]+]]:_(<4 x s8>) = G_UNMERGE_VALUES [[SHUF]](<8 x s8>) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s8) = G_CONSTANT i8 1 + ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8), [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[UV6]](<4 x s8>) + ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[UV8]](s8), [[UV9]](s8), [[UV10]](s8), [[UV11]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[BUILD_VECTOR3]](<8 x s8>) + ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(<4 x s16>), [[UV13:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT1]](<8 x s16>) + ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[BUILD_VECTOR4]](<8 x s8>) + ; CHECK-NEXT: [[UV14:%[0-9]+]]:_(<4 x s16>), [[UV15:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT2]](<8 x s16>) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<4 x s16>) = G_XOR [[UV12]], [[UV14]] ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[ICMP]](<4 x s32>) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[UV4]](<4 x s8>) - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x s16>) = G_AND [[TRUNC5]], [[ANYEXT1]] + ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(s8), [[UV17:%[0-9]+]]:_(s8), [[UV18:%[0-9]+]]:_(s8), [[UV19:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[UV6]](<4 x s8>) + ; CHECK-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[UV16]](s8), [[UV17]](s8), [[UV18]](s8), [[UV19]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8) + ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[BUILD_VECTOR5]](<8 x s8>) + ; CHECK-NEXT: [[UV20:%[0-9]+]]:_(<4 x s16>), [[UV21:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT3]](<8 x s16>) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x s16>) = G_AND [[TRUNC5]], [[UV20]] ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[ICMP1]](<4 x s32>) ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<4 x s16>) = G_AND [[TRUNC6]], [[XOR]] ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<4 x s16>) = G_OR [[AND]], [[AND1]] - ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(<4 x s32>) = G_ANYEXT [[OR]](<4 x s16>) + ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(<4 x s32>) = G_ANYEXT [[OR]](<4 x s16>) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C3]](s32), [[C3]](s32), [[C3]](s32), [[C3]](s32) - ; CHECK-NEXT: %zext_select:_(<4 x s32>) = G_AND [[ANYEXT2]], [[BUILD_VECTOR4]] + ; CHECK-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C3]](s32), [[C3]](s32), [[C3]](s32), [[C3]](s32) + ; CHECK-NEXT: %zext_select:_(<4 x s32>) = G_AND [[ANYEXT4]], [[BUILD_VECTOR6]] ; CHECK-NEXT: $q0 = COPY %zext_select(<4 x s32>) ; CHECK-NEXT: RET_ReallyLR implicit $q0 %w0:_(s32) = COPY $w0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector-widen-crash.ll b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector-widen-crash.ll index 42a8f51002f20..f7efaeaa50705 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector-widen-crash.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector-widen-crash.ll @@ -1,16 +1,24 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 -; RUN: llc -global-isel-abort=2 -global-isel -o - %s | FileCheck %s +; RUN: llc -global-isel -o - %s | FileCheck %s target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" target triple = "arm64-apple-macosx11.0.0" declare i32 @llvm.aarch64.neon.uaddv.i32.v4i32(<4 x i32>) #0 -; This test currently falls back but ensures we don't crash. - define i32 @bar() { ; CHECK-LABEL: bar: ; CHECK: ; %bb.0: ; %bb ; CHECK-NEXT: movi.2d v0, #0000000000000000 +; CHECK-NEXT: mov b1, v0[1] +; CHECK-NEXT: mov b2, v0[2] +; CHECK-NEXT: mov b3, v0[3] +; CHECK-NEXT: mov.h v0[1], v1[0] +; CHECK-NEXT: mov.h v2[1], v3[0] +; CHECK-NEXT: ushll.4s v0, v0, #0 +; CHECK-NEXT: ushll.4s v1, v2, #0 +; CHECK-NEXT: mov.d v0[1], v1[0] +; CHECK-NEXT: movi.4s v1, #1 +; CHECK-NEXT: and.16b v0, v0, v1 ; CHECK-NEXT: addv.4s s0, v0 ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-xtn.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-xtn.mir index ed40a2ff7ea70..e729f027baa71 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-xtn.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-xtn.mir @@ -541,17 +541,13 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[DEF]](s32), [[DEF]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[BUILD_VECTOR1]](<4 x s32>) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s16>), [[TRUNC1]](<4 x s16>) - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(<8 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<8 x s16>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s8>), [[UV3:%[0-9]+]]:_(<2 x s8>), [[UV4:%[0-9]+]]:_(<2 x s8>), [[UV5:%[0-9]+]]:_(<2 x s8>) = G_UNMERGE_VALUES [[TRUNC2]](<8 x s8>) - ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s8>) = G_CONCAT_VECTORS [[UV2]](<2 x s8>), [[UV2]](<2 x s8>) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[CONCAT_VECTORS1]](<4 x s8>) - ; CHECK-NEXT: $d0 = COPY [[ANYEXT]](<4 x s16>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[UV]](s32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[UV1]](s32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[TRUNC]](s8), [[TRUNC1]](s8), [[TRUNC]](s8), [[TRUNC1]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[BUILD_VECTOR]](<8 x s8>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<4 x s16>), [[UV3:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT]](<8 x s16>) + ; CHECK-NEXT: $d0 = COPY [[UV2]](<4 x s16>) ; CHECK-NEXT: RET_ReallyLR implicit $d0 %0:_(<2 x s32>) = COPY $d0 %1:_(<2 x s8>) = G_TRUNC %0(<2 x s32>) diff --git a/llvm/test/CodeGen/AArch64/aarch64-smull.ll b/llvm/test/CodeGen/AArch64/aarch64-smull.ll index dbc5417e23133..61a4f64ac2bfc 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-smull.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-smull.ll @@ -3,8 +3,7 @@ ; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+sve < %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SVE ; RUN: llc -mtriple=aarch64 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI -; CHECK-GI: warning: Instruction selection used fallback path for smull_zext_v4i16_v4i32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for pmlsl2_v8i16_uzp1 +; CHECK-GI: warning: Instruction selection used fallback path for pmlsl2_v8i16_uzp1 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for smlsl2_v8i16_uzp1 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for umlsl2_v8i16_uzp1 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for smlsl2_v4i32_uzp1 @@ -189,13 +188,49 @@ define <8 x i32> @smull_zext_v8i8_v8i32_top_bit_is_1(ptr %A, ptr %B) nounwind { } define <4 x i32> @smull_zext_v4i16_v4i32(ptr %A, ptr %B) nounwind { -; CHECK-LABEL: smull_zext_v4i16_v4i32: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr s0, [x0] -; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: ushll v0.8h, v0.8b, #0 -; CHECK-NEXT: smull v0.4s, v0.4h, v1.4h -; CHECK-NEXT: ret +; CHECK-NEON-LABEL: smull_zext_v4i16_v4i32: +; CHECK-NEON: // %bb.0: +; CHECK-NEON-NEXT: ldr s0, [x0] +; CHECK-NEON-NEXT: ldr d1, [x1] +; CHECK-NEON-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-NEON-NEXT: smull v0.4s, v0.4h, v1.4h +; CHECK-NEON-NEXT: ret +; +; CHECK-SVE-LABEL: smull_zext_v4i16_v4i32: +; CHECK-SVE: // %bb.0: +; CHECK-SVE-NEXT: ldr s0, [x0] +; CHECK-SVE-NEXT: ldr d1, [x1] +; CHECK-SVE-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-SVE-NEXT: smull v0.4s, v0.4h, v1.4h +; CHECK-SVE-NEXT: ret +; +; CHECK-GI-LABEL: smull_zext_v4i16_v4i32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr w8, [x0] +; CHECK-GI-NEXT: fmov s0, w8 +; CHECK-GI-NEXT: uxtb w8, w8 +; CHECK-GI-NEXT: mov b1, v0.b[1] +; CHECK-GI-NEXT: mov b2, v0.b[2] +; CHECK-GI-NEXT: mov b3, v0.b[3] +; CHECK-GI-NEXT: fmov s0, w8 +; CHECK-GI-NEXT: fmov w9, s1 +; CHECK-GI-NEXT: fmov w10, s2 +; CHECK-GI-NEXT: fmov w11, s3 +; CHECK-GI-NEXT: uxtb w9, w9 +; CHECK-GI-NEXT: uxtb w10, w10 +; CHECK-GI-NEXT: uxtb w11, w11 +; CHECK-GI-NEXT: fmov s1, w9 +; CHECK-GI-NEXT: fmov s2, w10 +; CHECK-GI-NEXT: fmov s3, w11 +; CHECK-GI-NEXT: mov v0.h[1], v1.h[0] +; CHECK-GI-NEXT: mov v2.h[1], v3.h[0] +; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-GI-NEXT: ushll v1.4s, v2.4h, #0 +; CHECK-GI-NEXT: ldr d2, [x1] +; CHECK-GI-NEXT: mov v0.d[1], v1.d[0] +; CHECK-GI-NEXT: sshll v1.4s, v2.4h, #0 +; CHECK-GI-NEXT: mul v0.4s, v0.4s, v1.4s +; CHECK-GI-NEXT: ret %load.A = load <4 x i8>, ptr %A %load.B = load <4 x i16>, ptr %B %zext.A = zext <4 x i8> %load.A to <4 x i32> diff --git a/llvm/test/CodeGen/AArch64/arm64-extract-insert-varidx.ll b/llvm/test/CodeGen/AArch64/arm64-extract-insert-varidx.ll index a1e0693685272..bc399c8d4ff07 100644 --- a/llvm/test/CodeGen/AArch64/arm64-extract-insert-varidx.ll +++ b/llvm/test/CodeGen/AArch64/arm64-extract-insert-varidx.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast -aarch64-enable-sink-fold=true | FileCheck %s --check-prefix=CHECK-SDAG -; RUN: llc < %s -global-isel -global-isel-abort=2 -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast -aarch64-enable-sink-fold=true | FileCheck %s --check-prefix=CHECK-GISEL +; RUN: llc < %s -global-isel -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast -aarch64-enable-sink-fold=true | FileCheck %s --check-prefix=CHECK-GISEL define <4 x i8> @test_varidx_extract_v8s8(<8 x i8> %x, i32 %idx) { ; CHECK-SDAG-LABEL: test_varidx_extract_v8s8: @@ -29,20 +29,20 @@ define <4 x i8> @test_varidx_extract_v8s8(<8 x i8> %x, i32 %idx) { ; CHECK-GISEL-NEXT: .cfi_def_cfa_offset 16 ; CHECK-GISEL-NEXT: mov w9, w0 ; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GISEL-NEXT: mov b1, v0.b[1] ; CHECK-GISEL-NEXT: add x8, sp, #8 -; CHECK-GISEL-NEXT: str d0, [sp, #8] ; CHECK-GISEL-NEXT: and x9, x9, #0x7 -; CHECK-GISEL-NEXT: mov b2, v0.b[1] +; CHECK-GISEL-NEXT: str d0, [sp, #8] ; CHECK-GISEL-NEXT: mov b3, v0.b[2] ; CHECK-GISEL-NEXT: lsl x10, x9, #1 ; CHECK-GISEL-NEXT: mov b0, v0.b[3] ; CHECK-GISEL-NEXT: sub x9, x10, x9 -; CHECK-GISEL-NEXT: ldrb w8, [x8, x9] -; CHECK-GISEL-NEXT: fmov s1, w8 -; CHECK-GISEL-NEXT: mov v1.h[1], v2.h[0] -; CHECK-GISEL-NEXT: mov v1.h[2], v3.h[0] -; CHECK-GISEL-NEXT: mov v1.h[3], v0.h[0] -; CHECK-GISEL-NEXT: fmov d0, d1 +; CHECK-GISEL-NEXT: ldr b2, [x8, x9] +; CHECK-GISEL-NEXT: mov v2.b[1], v1.b[0] +; CHECK-GISEL-NEXT: mov v2.b[2], v3.b[0] +; CHECK-GISEL-NEXT: mov v2.b[3], v0.b[0] +; CHECK-GISEL-NEXT: ushll v0.8h, v2.8b, #0 +; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GISEL-NEXT: add sp, sp, #16 ; CHECK-GISEL-NEXT: ret %tmp = extractelement <8 x i8> %x, i32 %idx @@ -176,17 +176,15 @@ define <2 x i16> @test_varidx_extract_v4s16(<4 x i16> %x, i32 %idx) { ; CHECK-GISEL: // %bb.0: ; CHECK-GISEL-NEXT: sub sp, sp, #16 ; CHECK-GISEL-NEXT: .cfi_def_cfa_offset 16 -; CHECK-GISEL-NEXT: mov w9, w0 -; CHECK-GISEL-NEXT: mov w8, #2 // =0x2 -; CHECK-GISEL-NEXT: add x10, sp, #8 -; CHECK-GISEL-NEXT: and x9, x9, #0x3 ; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GISEL-NEXT: mov w9, w0 +; CHECK-GISEL-NEXT: mov h1, v0.h[1] +; CHECK-GISEL-NEXT: add x8, sp, #8 ; CHECK-GISEL-NEXT: str d0, [sp, #8] -; CHECK-GISEL-NEXT: madd x8, x9, x8, x10 -; CHECK-GISEL-NEXT: umov w9, v0.h[1] -; CHECK-GISEL-NEXT: fmov s1, w9 -; CHECK-GISEL-NEXT: ldr h0, [x8] -; CHECK-GISEL-NEXT: mov v0.s[1], v1.s[0] +; CHECK-GISEL-NEXT: and x9, x9, #0x3 +; CHECK-GISEL-NEXT: ldr h0, [x8, x9, lsl #1] +; CHECK-GISEL-NEXT: mov v0.h[1], v1.h[0] +; CHECK-GISEL-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GISEL-NEXT: add sp, sp, #16 ; CHECK-GISEL-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/bitcast.ll b/llvm/test/CodeGen/AArch64/bitcast.ll index b7c02d6855a9e..e0851fd8739ec 100644 --- a/llvm/test/CodeGen/AArch64/bitcast.ll +++ b/llvm/test/CodeGen/AArch64/bitcast.ll @@ -1,14 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-SD -; RUN: llc -mtriple=aarch64 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI +; RUN: llc -mtriple=aarch64 -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI ; PR23065: SCALAR_TO_VECTOR implies the top elements 1 to N-1 of the N-element vector are undefined. -; CHECK-GI: warning: Instruction selection used fallback path for bitcast_i32_v4i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for bitcast_i32_v2i16 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for bitcast_v2i16_v4i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for bitcast_v4i8_v2i16 - define <4 x i16> @foo1(<2 x i32> %a) { ; CHECK-SD-LABEL: foo1: ; CHECK-SD: // %bb.0: @@ -80,12 +75,26 @@ define i32 @bitcast_v4i8_i32(<4 x i8> %a, <4 x i8> %b){ } define <4 x i8> @bitcast_i32_v4i8(i32 %a, i32 %b){ -; CHECK-LABEL: bitcast_i32_v4i8: -; CHECK: // %bb.0: -; CHECK-NEXT: add w8, w0, w1 -; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: zip1 v0.8b, v0.8b, v0.8b -; CHECK-NEXT: ret +; CHECK-SD-LABEL: bitcast_i32_v4i8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: add w8, w0, w1 +; CHECK-SD-NEXT: fmov s0, w8 +; CHECK-SD-NEXT: zip1 v0.8b, v0.8b, v0.8b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: bitcast_i32_v4i8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: add w8, w0, w1 +; CHECK-GI-NEXT: fmov s0, w8 +; CHECK-GI-NEXT: mov b1, v0.b[1] +; CHECK-GI-NEXT: mov b2, v0.b[2] +; CHECK-GI-NEXT: mov b3, v0.b[3] +; CHECK-GI-NEXT: mov v0.b[1], v1.b[0] +; CHECK-GI-NEXT: mov v0.b[2], v2.b[0] +; CHECK-GI-NEXT: mov v0.b[3], v3.b[0] +; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-GI-NEXT: ret %c = add i32 %a, %b %d = bitcast i32 %c to <4 x i8> ret <4 x i8> %d @@ -119,13 +128,23 @@ define i32 @bitcast_v2i16_i32(<2 x i16> %a, <2 x i16> %b){ } define <2 x i16> @bitcast_i32_v2i16(i32 %a, i32 %b){ -; CHECK-LABEL: bitcast_i32_v2i16: -; CHECK: // %bb.0: -; CHECK-NEXT: add w8, w0, w1 -; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: ushll v0.4s, v0.4h, #0 -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: bitcast_i32_v2i16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: add w8, w0, w1 +; CHECK-SD-NEXT: fmov s0, w8 +; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: bitcast_i32_v2i16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: add w8, w0, w1 +; CHECK-GI-NEXT: fmov s0, w8 +; CHECK-GI-NEXT: mov h1, v0.h[1] +; CHECK-GI-NEXT: mov v0.h[1], v1.h[0] +; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-GI-NEXT: ret %c = add i32 %a, %b %d = bitcast i32 %c to <2 x i16> ret <2 x i16> %d @@ -382,40 +401,72 @@ define <8 x i16> @bitcast_v16i8_v8i16(<16 x i8> %a, <16 x i8> %b){ ; ===== Smaller/Larger Width Vectors with Legal Element Sizes ===== define <4 x i8> @bitcast_v2i16_v4i8(<2 x i16> %a, <2 x i16> %b){ -; CHECK-LABEL: bitcast_v2i16_v4i8: -; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: add v0.2s, v0.2s, v1.2s -; CHECK-NEXT: mov w8, v0.s[1] -; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: strh w9, [sp, #12] -; CHECK-NEXT: strh w8, [sp, #14] -; CHECK-NEXT: ldr s0, [sp, #12] -; CHECK-NEXT: ushll v0.8h, v0.8b, #0 -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-NEXT: add sp, sp, #16 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: bitcast_v2i16_v4i8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sub sp, sp, #16 +; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 +; CHECK-SD-NEXT: add v0.2s, v0.2s, v1.2s +; CHECK-SD-NEXT: mov w8, v0.s[1] +; CHECK-SD-NEXT: fmov w9, s0 +; CHECK-SD-NEXT: strh w9, [sp, #12] +; CHECK-SD-NEXT: strh w8, [sp, #14] +; CHECK-SD-NEXT: ldr s0, [sp, #12] +; CHECK-SD-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-SD-NEXT: add sp, sp, #16 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: bitcast_v2i16_v4i8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: add v0.2s, v0.2s, v1.2s +; CHECK-GI-NEXT: mov s1, v0.s[1] +; CHECK-GI-NEXT: mov v0.s[1], v1.s[0] +; CHECK-GI-NEXT: xtn v0.4h, v0.4s +; CHECK-GI-NEXT: mov b1, v0.b[1] +; CHECK-GI-NEXT: mov b2, v0.b[2] +; CHECK-GI-NEXT: mov b3, v0.b[3] +; CHECK-GI-NEXT: mov v0.b[1], v1.b[0] +; CHECK-GI-NEXT: mov v0.b[2], v2.b[0] +; CHECK-GI-NEXT: mov v0.b[3], v3.b[0] +; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-GI-NEXT: ret %c = add <2 x i16> %a, %b %d = bitcast <2 x i16> %c to <4 x i8> ret <4 x i8> %d } define <2 x i16> @bitcast_v4i8_v2i16(<4 x i8> %a, <4 x i8> %b){ -; CHECK-LABEL: bitcast_v4i8_v2i16: -; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: add v0.4h, v0.4h, v1.4h -; CHECK-NEXT: add x8, sp, #12 -; CHECK-NEXT: uzp1 v0.8b, v0.8b, v0.8b -; CHECK-NEXT: str s0, [sp, #12] -; CHECK-NEXT: ld1 { v0.h }[0], [x8] -; CHECK-NEXT: orr x8, x8, #0x2 -; CHECK-NEXT: ld1 { v0.h }[2], [x8] -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-NEXT: add sp, sp, #16 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: bitcast_v4i8_v2i16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sub sp, sp, #16 +; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 +; CHECK-SD-NEXT: add v0.4h, v0.4h, v1.4h +; CHECK-SD-NEXT: add x8, sp, #12 +; CHECK-SD-NEXT: uzp1 v0.8b, v0.8b, v0.8b +; CHECK-SD-NEXT: str s0, [sp, #12] +; CHECK-SD-NEXT: ld1 { v0.h }[0], [x8] +; CHECK-SD-NEXT: orr x8, x8, #0x2 +; CHECK-SD-NEXT: ld1 { v0.h }[2], [x8] +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-SD-NEXT: add sp, sp, #16 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: bitcast_v4i8_v2i16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: add v0.4h, v0.4h, v1.4h +; CHECK-GI-NEXT: mov h1, v0.h[1] +; CHECK-GI-NEXT: mov h2, v0.h[2] +; CHECK-GI-NEXT: mov h3, v0.h[3] +; CHECK-GI-NEXT: mov v0.h[1], v1.h[0] +; CHECK-GI-NEXT: mov v0.h[2], v2.h[0] +; CHECK-GI-NEXT: mov v0.h[3], v3.h[0] +; CHECK-GI-NEXT: xtn v0.8b, v0.8h +; CHECK-GI-NEXT: mov h1, v0.h[1] +; CHECK-GI-NEXT: mov v0.h[1], v1.h[0] +; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-GI-NEXT: ret %c = add <4 x i8> %a, %b %d = bitcast <4 x i8> %c to <2 x i16> ret <2 x i16> %d diff --git a/llvm/test/CodeGen/AArch64/bswap.ll b/llvm/test/CodeGen/AArch64/bswap.ll index f4221accfcbc5..071613b9cc011 100644 --- a/llvm/test/CodeGen/AArch64/bswap.ll +++ b/llvm/test/CodeGen/AArch64/bswap.ll @@ -1,8 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ; RUN: llc -mtriple=aarch64 %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD -; RUN: llc -mtriple=aarch64 -global-isel -global-isel-abort=2 %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI - -; CHECK-GI: warning: Instruction selection used fallback path for bswap_v2i16 +; RUN: llc -mtriple=aarch64 -global-isel %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI ; ====== Scalar Tests ===== define i16 @bswap_i16(i16 %a){ @@ -103,11 +101,23 @@ declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>) ; ===== Smaller/Larger Width Vectors with Legal Element Sizes ===== define <2 x i16> @bswap_v2i16(<2 x i16> %a){ -; CHECK-LABEL: bswap_v2i16: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: rev32 v0.8b, v0.8b -; CHECK-NEXT: ushr v0.2s, v0.2s, #16 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: bswap_v2i16: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: rev32 v0.8b, v0.8b +; CHECK-SD-NEXT: ushr v0.2s, v0.2s, #16 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: bswap_v2i16: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NEXT: mov s1, v0.s[1] +; CHECK-GI-NEXT: mov v0.h[1], v1.h[0] +; CHECK-GI-NEXT: rev16 v0.8b, v0.8b +; CHECK-GI-NEXT: mov h1, v0.h[1] +; CHECK-GI-NEXT: mov v0.h[1], v1.h[0] +; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-GI-NEXT: ret entry: %res = call <2 x i16> @llvm.bswap.v2i16(<2 x i16> %a) ret <2 x i16> %res diff --git a/llvm/test/CodeGen/AArch64/fptoi.ll b/llvm/test/CodeGen/AArch64/fptoi.ll index 7af01b53dae7e..01585d02adcb1 100644 --- a/llvm/test/CodeGen/AArch64/fptoi.ll +++ b/llvm/test/CodeGen/AArch64/fptoi.ll @@ -1,13 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-NOFP16 ; RUN: llc -mtriple=aarch64 -mattr=+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-FP16 -; RUN: llc -mtriple=aarch64 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-NOFP16 -; RUN: llc -mtriple=aarch64 -mattr=+fullfp16 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-FP16 - -; CHECK-GI-FP16: warning: Instruction selection used fallback path for fptos_v2f16_v2i16 -; CHECK-GI-FP16-NEXT: warning: Instruction selection used fallback path for fptou_v2f16_v2i16 -; CHECK-GI-FP16-NEXT: warning: Instruction selection used fallback path for fptos_v2f16_v2i8 -; CHECK-GI-FP16-NEXT: warning: Instruction selection used fallback path for fptou_v2f16_v2i8 +; RUN: llc -mtriple=aarch64 -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-NOFP16 +; RUN: llc -mtriple=aarch64 -mattr=+fullfp16 -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-FP16 define i64 @fptos_f64_i64(double %a) { ; CHECK-LABEL: fptos_f64_i64: @@ -5184,8 +5179,13 @@ define <2 x i16> @fptos_v2f16_v2i16(<2 x half> %a) { ; ; CHECK-GI-FP16-LABEL: fptos_v2f16_v2i16: ; CHECK-GI-FP16: // %bb.0: // %entry -; CHECK-GI-FP16-NEXT: fcvtl v0.4s, v0.4h -; CHECK-GI-FP16-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] +; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] +; CHECK-GI-FP16-NEXT: fcvtzs v0.4h, v0.4h +; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] +; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] +; CHECK-GI-FP16-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-FP16-NEXT: ret entry: @@ -5212,8 +5212,13 @@ define <2 x i16> @fptou_v2f16_v2i16(<2 x half> %a) { ; ; CHECK-GI-FP16-LABEL: fptou_v2f16_v2i16: ; CHECK-GI-FP16: // %bb.0: // %entry -; CHECK-GI-FP16-NEXT: fcvtl v0.4s, v0.4h -; CHECK-GI-FP16-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] +; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] +; CHECK-GI-FP16-NEXT: fcvtzu v0.4h, v0.4h +; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] +; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] +; CHECK-GI-FP16-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-FP16-NEXT: ret entry: @@ -5658,8 +5663,13 @@ define <2 x i8> @fptos_v2f16_v2i8(<2 x half> %a) { ; ; CHECK-GI-FP16-LABEL: fptos_v2f16_v2i8: ; CHECK-GI-FP16: // %bb.0: // %entry -; CHECK-GI-FP16-NEXT: fcvtl v0.4s, v0.4h -; CHECK-GI-FP16-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] +; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] +; CHECK-GI-FP16-NEXT: fcvtzs v0.4h, v0.4h +; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] +; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] +; CHECK-GI-FP16-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-FP16-NEXT: ret entry: @@ -5686,8 +5696,13 @@ define <2 x i8> @fptou_v2f16_v2i8(<2 x half> %a) { ; ; CHECK-GI-FP16-LABEL: fptou_v2f16_v2i8: ; CHECK-GI-FP16: // %bb.0: // %entry -; CHECK-GI-FP16-NEXT: fcvtl v0.4s, v0.4h -; CHECK-GI-FP16-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] +; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] +; CHECK-GI-FP16-NEXT: fcvtzu v0.4h, v0.4h +; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] +; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] +; CHECK-GI-FP16-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-FP16-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/itofp.ll b/llvm/test/CodeGen/AArch64/itofp.ll index ceac37f91238a..f5a7b5dc9f492 100644 --- a/llvm/test/CodeGen/AArch64/itofp.ll +++ b/llvm/test/CodeGen/AArch64/itofp.ll @@ -4,13 +4,6 @@ ; RUN: llc -mtriple=aarch64 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-NOFP16 ; RUN: llc -mtriple=aarch64 -mattr=+fullfp16 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-FP16 -; CHECK-GI: warning: Instruction selection used fallback path for stofp_v3i8_v3f64 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_v3i8_v3f64 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_v3i8_v3f32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_v3i8_v3f32 -; CHECK-GI-NOFP16-NEXT: warning: Instruction selection used fallback path for stofp_v3i8_v3f16 -; CHECK-GI-NOFP16-NEXT: warning: Instruction selection used fallback path for utofp_v3i8_v3f16 - define double @stofp_i64_f64(i64 %a) { ; CHECK-LABEL: stofp_i64_f64: ; CHECK: // %bb.0: // %entry @@ -1754,47 +1747,109 @@ entry: } define <3 x double> @stofp_v3i8_v3f64(<3 x i8> %a) { -; CHECK-LABEL: stofp_v3i8_v3f64: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fmov s0, w0 -; CHECK-NEXT: fmov s1, w2 -; CHECK-NEXT: mov v0.s[1], w1 -; CHECK-NEXT: shl v1.2s, v1.2s, #24 -; CHECK-NEXT: sshr v1.2s, v1.2s, #24 -; CHECK-NEXT: shl v0.2s, v0.2s, #24 -; CHECK-NEXT: sshll v1.2d, v1.2s, #0 -; CHECK-NEXT: sshr v0.2s, v0.2s, #24 -; CHECK-NEXT: scvtf v2.2d, v1.2d -; CHECK-NEXT: sshll v0.2d, v0.2s, #0 -; CHECK-NEXT: // kill: def $d2 killed $d2 killed $q2 -; CHECK-NEXT: scvtf v0.2d, v0.2d -; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: stofp_v3i8_v3f64: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: fmov s0, w0 +; CHECK-SD-NEXT: fmov s1, w2 +; CHECK-SD-NEXT: mov v0.s[1], w1 +; CHECK-SD-NEXT: shl v1.2s, v1.2s, #24 +; CHECK-SD-NEXT: sshr v1.2s, v1.2s, #24 +; CHECK-SD-NEXT: shl v0.2s, v0.2s, #24 +; CHECK-SD-NEXT: sshll v1.2d, v1.2s, #0 +; CHECK-SD-NEXT: sshr v0.2s, v0.2s, #24 +; CHECK-SD-NEXT: scvtf v2.2d, v1.2d +; CHECK-SD-NEXT: sshll v0.2d, v0.2s, #0 +; CHECK-SD-NEXT: // kill: def $d2 killed $d2 killed $q2 +; CHECK-SD-NEXT: scvtf v0.2d, v0.2d +; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-SD-NEXT: // kill: def $d1 killed $d1 killed $q1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: stofp_v3i8_v3f64: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: fmov s0, w0 +; CHECK-GI-NEXT: fmov s1, w1 +; CHECK-GI-NEXT: mov v0.h[1], v1.h[0] +; CHECK-GI-NEXT: fmov s1, w2 +; CHECK-GI-NEXT: mov v0.h[2], v1.h[0] +; CHECK-GI-NEXT: shl v0.4h, v0.4h, #8 +; CHECK-GI-NEXT: sshr v0.4h, v0.4h, #8 +; CHECK-GI-NEXT: mov h2, v0.h[1] +; CHECK-GI-NEXT: mov s1, v0.s[1] +; CHECK-GI-NEXT: mov v0.h[1], v2.h[0] +; CHECK-GI-NEXT: mov h2, v1.h[1] +; CHECK-GI-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-GI-NEXT: mov v1.h[1], v2.h[0] +; CHECK-GI-NEXT: smov x8, v0.s[0] +; CHECK-GI-NEXT: smov x9, v0.s[1] +; CHECK-GI-NEXT: sshll v0.4s, v1.4h, #0 +; CHECK-GI-NEXT: fmov d1, x8 +; CHECK-GI-NEXT: smov x8, v0.s[0] +; CHECK-GI-NEXT: mov v1.d[1], x9 +; CHECK-GI-NEXT: smov x9, v0.s[1] +; CHECK-GI-NEXT: fmov d2, x8 +; CHECK-GI-NEXT: scvtf v0.2d, v1.2d +; CHECK-GI-NEXT: mov v2.d[1], x9 +; CHECK-GI-NEXT: mov d1, v0.d[1] +; CHECK-GI-NEXT: scvtf v2.2d, v2.2d +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-GI-NEXT: // kill: def $d2 killed $d2 killed $q2 +; CHECK-GI-NEXT: ret entry: %c = sitofp <3 x i8> %a to <3 x double> ret <3 x double> %c } define <3 x double> @utofp_v3i8_v3f64(<3 x i8> %a) { -; CHECK-LABEL: utofp_v3i8_v3f64: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fmov s0, w0 -; CHECK-NEXT: movi d1, #0x0000ff000000ff -; CHECK-NEXT: fmov s2, w2 -; CHECK-NEXT: mov v0.s[1], w1 -; CHECK-NEXT: and v0.8b, v0.8b, v1.8b -; CHECK-NEXT: and v1.8b, v2.8b, v1.8b -; CHECK-NEXT: ushll v0.2d, v0.2s, #0 -; CHECK-NEXT: ushll v1.2d, v1.2s, #0 -; CHECK-NEXT: ucvtf v0.2d, v0.2d -; CHECK-NEXT: ucvtf v2.2d, v1.2d -; CHECK-NEXT: // kill: def $d2 killed $d2 killed $q2 -; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: utofp_v3i8_v3f64: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: fmov s0, w0 +; CHECK-SD-NEXT: movi d1, #0x0000ff000000ff +; CHECK-SD-NEXT: fmov s2, w2 +; CHECK-SD-NEXT: mov v0.s[1], w1 +; CHECK-SD-NEXT: and v0.8b, v0.8b, v1.8b +; CHECK-SD-NEXT: and v1.8b, v2.8b, v1.8b +; CHECK-SD-NEXT: ushll v0.2d, v0.2s, #0 +; CHECK-SD-NEXT: ushll v1.2d, v1.2s, #0 +; CHECK-SD-NEXT: ucvtf v0.2d, v0.2d +; CHECK-SD-NEXT: ucvtf v2.2d, v1.2d +; CHECK-SD-NEXT: // kill: def $d2 killed $d2 killed $q2 +; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-SD-NEXT: // kill: def $d1 killed $d1 killed $q1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: utofp_v3i8_v3f64: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: fmov s0, w0 +; CHECK-GI-NEXT: fmov s1, w1 +; CHECK-GI-NEXT: mov v0.h[1], v1.h[0] +; CHECK-GI-NEXT: fmov s1, w2 +; CHECK-GI-NEXT: mov v0.h[2], v1.h[0] +; CHECK-GI-NEXT: movi d1, #0xff00ff00ff00ff +; CHECK-GI-NEXT: and v0.8b, v0.8b, v1.8b +; CHECK-GI-NEXT: mov h2, v0.h[1] +; CHECK-GI-NEXT: mov s1, v0.s[1] +; CHECK-GI-NEXT: mov v0.h[1], v2.h[0] +; CHECK-GI-NEXT: mov h2, v1.h[1] +; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-GI-NEXT: mov v1.h[1], v2.h[0] +; CHECK-GI-NEXT: mov w8, v0.s[0] +; CHECK-GI-NEXT: mov w9, v0.s[1] +; CHECK-GI-NEXT: ushll v0.4s, v1.4h, #0 +; CHECK-GI-NEXT: fmov d1, x8 +; CHECK-GI-NEXT: mov w8, v0.s[0] +; CHECK-GI-NEXT: mov v1.d[1], x9 +; CHECK-GI-NEXT: mov w9, v0.s[1] +; CHECK-GI-NEXT: fmov d2, x8 +; CHECK-GI-NEXT: ucvtf v0.2d, v1.2d +; CHECK-GI-NEXT: mov v2.d[1], x9 +; CHECK-GI-NEXT: mov d1, v0.d[1] +; CHECK-GI-NEXT: ucvtf v2.2d, v2.2d +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-GI-NEXT: // kill: def $d2 killed $d2 killed $q2 +; CHECK-GI-NEXT: ret entry: %c = uitofp <3 x i8> %a to <3 x double> ret <3 x double> %c @@ -3372,31 +3427,71 @@ entry: } define <3 x float> @stofp_v3i8_v3f32(<3 x i8> %a) { -; CHECK-LABEL: stofp_v3i8_v3f32: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fmov s0, w0 -; CHECK-NEXT: mov v0.h[1], w1 -; CHECK-NEXT: mov v0.h[2], w2 -; CHECK-NEXT: shl v0.4h, v0.4h, #8 -; CHECK-NEXT: sshr v0.4h, v0.4h, #8 -; CHECK-NEXT: sshll v0.4s, v0.4h, #0 -; CHECK-NEXT: scvtf v0.4s, v0.4s -; CHECK-NEXT: ret +; CHECK-SD-LABEL: stofp_v3i8_v3f32: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: fmov s0, w0 +; CHECK-SD-NEXT: mov v0.h[1], w1 +; CHECK-SD-NEXT: mov v0.h[2], w2 +; CHECK-SD-NEXT: shl v0.4h, v0.4h, #8 +; CHECK-SD-NEXT: sshr v0.4h, v0.4h, #8 +; CHECK-SD-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-SD-NEXT: scvtf v0.4s, v0.4s +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: stofp_v3i8_v3f32: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: fmov s0, w0 +; CHECK-GI-NEXT: fmov s1, w1 +; CHECK-GI-NEXT: mov v0.h[1], v1.h[0] +; CHECK-GI-NEXT: fmov s1, w2 +; CHECK-GI-NEXT: mov v0.h[2], v1.h[0] +; CHECK-GI-NEXT: shl v0.4h, v0.4h, #8 +; CHECK-GI-NEXT: sshr v0.4h, v0.4h, #8 +; CHECK-GI-NEXT: mov s1, v0.s[1] +; CHECK-GI-NEXT: mov h2, v0.h[1] +; CHECK-GI-NEXT: mov h3, v1.h[1] +; CHECK-GI-NEXT: mov v0.h[1], v2.h[0] +; CHECK-GI-NEXT: mov v1.h[1], v3.h[0] +; CHECK-GI-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-GI-NEXT: sshll v1.4s, v1.4h, #0 +; CHECK-GI-NEXT: mov v0.d[1], v1.d[0] +; CHECK-GI-NEXT: scvtf v0.4s, v0.4s +; CHECK-GI-NEXT: ret entry: %c = sitofp <3 x i8> %a to <3 x float> ret <3 x float> %c } define <3 x float> @utofp_v3i8_v3f32(<3 x i8> %a) { -; CHECK-LABEL: utofp_v3i8_v3f32: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fmov s0, w0 -; CHECK-NEXT: mov v0.h[1], w1 -; CHECK-NEXT: mov v0.h[2], w2 -; CHECK-NEXT: bic v0.4h, #255, lsl #8 -; CHECK-NEXT: ushll v0.4s, v0.4h, #0 -; CHECK-NEXT: ucvtf v0.4s, v0.4s -; CHECK-NEXT: ret +; CHECK-SD-LABEL: utofp_v3i8_v3f32: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: fmov s0, w0 +; CHECK-SD-NEXT: mov v0.h[1], w1 +; CHECK-SD-NEXT: mov v0.h[2], w2 +; CHECK-SD-NEXT: bic v0.4h, #255, lsl #8 +; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-SD-NEXT: ucvtf v0.4s, v0.4s +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: utofp_v3i8_v3f32: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: fmov s0, w0 +; CHECK-GI-NEXT: fmov s1, w1 +; CHECK-GI-NEXT: mov v0.h[1], v1.h[0] +; CHECK-GI-NEXT: fmov s1, w2 +; CHECK-GI-NEXT: mov v0.h[2], v1.h[0] +; CHECK-GI-NEXT: movi d1, #0xff00ff00ff00ff +; CHECK-GI-NEXT: and v0.8b, v0.8b, v1.8b +; CHECK-GI-NEXT: mov s1, v0.s[1] +; CHECK-GI-NEXT: mov h2, v0.h[1] +; CHECK-GI-NEXT: mov h3, v1.h[1] +; CHECK-GI-NEXT: mov v0.h[1], v2.h[0] +; CHECK-GI-NEXT: mov v1.h[1], v3.h[0] +; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0 +; CHECK-GI-NEXT: mov v0.d[1], v1.d[0] +; CHECK-GI-NEXT: ucvtf v0.4s, v0.4s +; CHECK-GI-NEXT: ret entry: %c = uitofp <3 x i8> %a to <3 x float> ret <3 x float> %c @@ -5582,12 +5677,18 @@ define <2 x half> @utofp_v2i8_v2f16(<2 x i8> %a) { ; CHECK-GI-FP16-LABEL: utofp_v2i8_v2f16: ; CHECK-GI-FP16: // %bb.0: // %entry ; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-FP16-NEXT: mov w8, v0.s[1] -; CHECK-GI-FP16-NEXT: fmov w9, s0 -; CHECK-GI-FP16-NEXT: and w9, w9, #0xff -; CHECK-GI-FP16-NEXT: and w8, w8, #0xff -; CHECK-GI-FP16-NEXT: ucvtf h0, w9 -; CHECK-GI-FP16-NEXT: ucvtf h1, w8 +; CHECK-GI-FP16-NEXT: mov s1, v0.s[1] +; CHECK-GI-FP16-NEXT: mov v0.s[1], v1.s[0] +; CHECK-GI-FP16-NEXT: xtn v0.4h, v0.4s +; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] +; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] +; CHECK-GI-FP16-NEXT: movi d1, #0x0000ff000000ff +; CHECK-GI-FP16-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-GI-FP16-NEXT: and v0.8b, v0.8b, v1.8b +; CHECK-GI-FP16-NEXT: mov s1, v0.s[1] +; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] +; CHECK-GI-FP16-NEXT: ucvtf v0.4h, v0.4h +; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] ; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] ; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-FP16-NEXT: ret @@ -5622,11 +5723,20 @@ define <3 x half> @stofp_v3i8_v3f16(<3 x i8> %a) { ; CHECK-GI-NOFP16-LABEL: stofp_v3i8_v3f16: ; CHECK-GI-NOFP16: // %bb.0: // %entry ; CHECK-GI-NOFP16-NEXT: fmov s0, w0 -; CHECK-GI-NOFP16-NEXT: mov v0.h[1], w1 -; CHECK-GI-NOFP16-NEXT: mov v0.h[2], w2 +; CHECK-GI-NOFP16-NEXT: fmov s1, w1 +; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v1.h[0] +; CHECK-GI-NOFP16-NEXT: fmov s1, w2 +; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v1.h[0] ; CHECK-GI-NOFP16-NEXT: shl v0.4h, v0.4h, #8 ; CHECK-GI-NOFP16-NEXT: sshr v0.4h, v0.4h, #8 +; CHECK-GI-NOFP16-NEXT: mov s1, v0.s[1] +; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1] +; CHECK-GI-NOFP16-NEXT: mov h3, v1.h[1] +; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v2.h[0] +; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v3.h[0] ; CHECK-GI-NOFP16-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-GI-NOFP16-NEXT: sshll v1.4s, v1.4h, #0 +; CHECK-GI-NOFP16-NEXT: mov v0.d[1], v1.d[0] ; CHECK-GI-NOFP16-NEXT: scvtf v0.4s, v0.4s ; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v0.4s ; CHECK-GI-NOFP16-NEXT: ret @@ -5635,11 +5745,10 @@ define <3 x half> @stofp_v3i8_v3f16(<3 x i8> %a) { ; CHECK-GI-FP16: // %bb.0: // %entry ; CHECK-GI-FP16-NEXT: fmov s0, w0 ; CHECK-GI-FP16-NEXT: fmov s1, w1 -; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] +; CHECK-GI-FP16-NEXT: mov v0.b[1], v1.b[0] ; CHECK-GI-FP16-NEXT: fmov s1, w2 -; CHECK-GI-FP16-NEXT: mov v0.h[2], v1.h[0] -; CHECK-GI-FP16-NEXT: shl v0.4h, v0.4h, #8 -; CHECK-GI-FP16-NEXT: sshr v0.4h, v0.4h, #8 +; CHECK-GI-FP16-NEXT: mov v0.b[2], v1.b[0] +; CHECK-GI-FP16-NEXT: sshll v0.8h, v0.8b, #0 ; CHECK-GI-FP16-NEXT: scvtf v0.4h, v0.4h ; CHECK-GI-FP16-NEXT: ret entry: @@ -5671,10 +5780,20 @@ define <3 x half> @utofp_v3i8_v3f16(<3 x i8> %a) { ; CHECK-GI-NOFP16-LABEL: utofp_v3i8_v3f16: ; CHECK-GI-NOFP16: // %bb.0: // %entry ; CHECK-GI-NOFP16-NEXT: fmov s0, w0 -; CHECK-GI-NOFP16-NEXT: mov v0.h[1], w1 -; CHECK-GI-NOFP16-NEXT: mov v0.h[2], w2 -; CHECK-GI-NOFP16-NEXT: bic v0.4h, #255, lsl #8 +; CHECK-GI-NOFP16-NEXT: fmov s1, w1 +; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v1.h[0] +; CHECK-GI-NOFP16-NEXT: fmov s1, w2 +; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v1.h[0] +; CHECK-GI-NOFP16-NEXT: movi d1, #0xff00ff00ff00ff +; CHECK-GI-NOFP16-NEXT: and v0.8b, v0.8b, v1.8b +; CHECK-GI-NOFP16-NEXT: mov s1, v0.s[1] +; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1] +; CHECK-GI-NOFP16-NEXT: mov h3, v1.h[1] +; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v2.h[0] +; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v3.h[0] ; CHECK-GI-NOFP16-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-GI-NOFP16-NEXT: ushll v1.4s, v1.4h, #0 +; CHECK-GI-NOFP16-NEXT: mov v0.d[1], v1.d[0] ; CHECK-GI-NOFP16-NEXT: ucvtf v0.4s, v0.4s ; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v0.4s ; CHECK-GI-NOFP16-NEXT: ret @@ -5683,11 +5802,10 @@ define <3 x half> @utofp_v3i8_v3f16(<3 x i8> %a) { ; CHECK-GI-FP16: // %bb.0: // %entry ; CHECK-GI-FP16-NEXT: fmov s0, w0 ; CHECK-GI-FP16-NEXT: fmov s1, w1 -; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] +; CHECK-GI-FP16-NEXT: mov v0.b[1], v1.b[0] ; CHECK-GI-FP16-NEXT: fmov s1, w2 -; CHECK-GI-FP16-NEXT: mov v0.h[2], v1.h[0] -; CHECK-GI-FP16-NEXT: movi d1, #0xff00ff00ff00ff -; CHECK-GI-FP16-NEXT: and v0.8b, v0.8b, v1.8b +; CHECK-GI-FP16-NEXT: mov v0.b[2], v1.b[0] +; CHECK-GI-FP16-NEXT: ushll v0.8h, v0.8b, #0 ; CHECK-GI-FP16-NEXT: ucvtf v0.4h, v0.4h ; CHECK-GI-FP16-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/load.ll b/llvm/test/CodeGen/AArch64/load.ll index 39143e5c53ffd..c3c0ec5e3d9d8 100644 --- a/llvm/test/CodeGen/AArch64/load.ll +++ b/llvm/test/CodeGen/AArch64/load.ll @@ -159,7 +159,8 @@ define <2 x i16> @load_v2i16(ptr %ptr){ ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: ldr h0, [x0] ; CHECK-GI-NEXT: ldr h1, [x0, #2] -; CHECK-GI-NEXT: mov v0.s[1], v1.s[0] +; CHECK-GI-NEXT: mov v0.h[1], v1.h[0] +; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: ret %a = load <2 x i16>, ptr %ptr diff --git a/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll b/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll index 01620652301ed..57f220f621cf8 100644 --- a/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll +++ b/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll @@ -1117,8 +1117,15 @@ define <4 x i16> @vselect_constant_cond_zero_v4i16(<4 x i16> %a) { ; ; CHECK-GI-LABEL: vselect_constant_cond_zero_v4i16: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: adrp x8, .LCPI84_0 -; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI84_0] +; CHECK-GI-NEXT: mov w8, #1 // =0x1 +; CHECK-GI-NEXT: mov w9, #0 // =0x0 +; CHECK-GI-NEXT: fmov s1, w8 +; CHECK-GI-NEXT: fmov s2, w9 +; CHECK-GI-NEXT: mov v3.16b, v1.16b +; CHECK-GI-NEXT: mov v3.b[1], v2.b[0] +; CHECK-GI-NEXT: mov v3.b[2], v2.b[0] +; CHECK-GI-NEXT: mov v3.b[3], v1.b[0] +; CHECK-GI-NEXT: ushll v1.8h, v3.8b, #0 ; CHECK-GI-NEXT: shl v1.4h, v1.4h, #15 ; CHECK-GI-NEXT: sshr v1.4h, v1.4h, #15 ; CHECK-GI-NEXT: and v0.8b, v0.8b, v1.8b @@ -1137,8 +1144,16 @@ define <4 x i32> @vselect_constant_cond_zero_v4i32(<4 x i32> %a) { ; ; CHECK-GI-LABEL: vselect_constant_cond_zero_v4i32: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: adrp x8, .LCPI85_0 -; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI85_0] +; CHECK-GI-NEXT: mov w8, #1 // =0x1 +; CHECK-GI-NEXT: mov w9, #0 // =0x0 +; CHECK-GI-NEXT: fmov s1, w8 +; CHECK-GI-NEXT: fmov s2, w9 +; CHECK-GI-NEXT: mov v3.16b, v1.16b +; CHECK-GI-NEXT: mov v3.h[1], v2.h[0] +; CHECK-GI-NEXT: mov v2.h[1], v1.h[0] +; CHECK-GI-NEXT: ushll v1.4s, v3.4h, #0 +; CHECK-GI-NEXT: ushll v2.4s, v2.4h, #0 +; CHECK-GI-NEXT: mov v1.d[1], v2.d[0] ; CHECK-GI-NEXT: shl v1.4s, v1.4s, #31 ; CHECK-GI-NEXT: sshr v1.4s, v1.4s, #31 ; CHECK-GI-NEXT: and v0.16b, v0.16b, v1.16b @@ -1181,8 +1196,15 @@ define <4 x i16> @vselect_constant_cond_v4i16(<4 x i16> %a, <4 x i16> %b) { ; ; CHECK-GI-LABEL: vselect_constant_cond_v4i16: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: adrp x8, .LCPI87_0 -; CHECK-GI-NEXT: ldr d2, [x8, :lo12:.LCPI87_0] +; CHECK-GI-NEXT: mov w8, #1 // =0x1 +; CHECK-GI-NEXT: mov w9, #0 // =0x0 +; CHECK-GI-NEXT: fmov s2, w8 +; CHECK-GI-NEXT: fmov s3, w9 +; CHECK-GI-NEXT: mov v4.16b, v2.16b +; CHECK-GI-NEXT: mov v4.b[1], v3.b[0] +; CHECK-GI-NEXT: mov v4.b[2], v3.b[0] +; CHECK-GI-NEXT: mov v4.b[3], v2.b[0] +; CHECK-GI-NEXT: ushll v2.8h, v4.8b, #0 ; CHECK-GI-NEXT: shl v2.4h, v2.4h, #15 ; CHECK-GI-NEXT: sshr v2.4h, v2.4h, #15 ; CHECK-GI-NEXT: bif v0.8b, v1.8b, v2.8b @@ -1201,8 +1223,16 @@ define <4 x i32> @vselect_constant_cond_v4i32(<4 x i32> %a, <4 x i32> %b) { ; ; CHECK-GI-LABEL: vselect_constant_cond_v4i32: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: adrp x8, .LCPI88_0 -; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI88_0] +; CHECK-GI-NEXT: mov w8, #1 // =0x1 +; CHECK-GI-NEXT: mov w9, #0 // =0x0 +; CHECK-GI-NEXT: fmov s2, w8 +; CHECK-GI-NEXT: fmov s3, w9 +; CHECK-GI-NEXT: mov v4.16b, v2.16b +; CHECK-GI-NEXT: mov v4.h[1], v3.h[0] +; CHECK-GI-NEXT: mov v3.h[1], v2.h[0] +; CHECK-GI-NEXT: ushll v2.4s, v4.4h, #0 +; CHECK-GI-NEXT: ushll v3.4s, v3.4h, #0 +; CHECK-GI-NEXT: mov v2.d[1], v3.d[0] ; CHECK-GI-NEXT: shl v2.4s, v2.4s, #31 ; CHECK-GI-NEXT: sshr v2.4s, v2.4s, #31 ; CHECK-GI-NEXT: bif v0.16b, v1.16b, v2.16b diff --git a/llvm/test/CodeGen/AArch64/shift.ll b/llvm/test/CodeGen/AArch64/shift.ll index 5287839ee7b70..9c8d3e0f07de8 100644 --- a/llvm/test/CodeGen/AArch64/shift.ll +++ b/llvm/test/CodeGen/AArch64/shift.ll @@ -1,13 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-SD -; RUN: llc -mtriple=aarch64 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI - -; CHECK-GI: warning: Instruction selection used fallback path for shl_v4i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for shl_v2i16 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for ashr_v4i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for ashr_v2i16 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lshr_v4i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lshr_v2i16 +; RUN: llc -mtriple=aarch64 -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI define i1 @shl_i1(i1 %0, i1 %1){ ; CHECK-SD-LABEL: shl_i1: @@ -530,11 +523,38 @@ define <2 x i64> @lshr_v2i64(<2 x i64> %0, <2 x i64> %1){ ; ===== Vector Larger/Smaller than Legal ===== define <4 x i8> @shl_v4i8(<4 x i8> %0, <4 x i8> %1){ -; CHECK-LABEL: shl_v4i8: -; CHECK: // %bb.0: -; CHECK-NEXT: bic v1.4h, #255, lsl #8 -; CHECK-NEXT: ushl v0.4h, v0.4h, v1.4h -; CHECK-NEXT: ret +; CHECK-SD-LABEL: shl_v4i8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: bic v1.4h, #255, lsl #8 +; CHECK-SD-NEXT: ushl v0.4h, v0.4h, v1.4h +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: shl_v4i8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-GI-NEXT: mov h2, v0.h[1] +; CHECK-GI-NEXT: mov h3, v1.h[1] +; CHECK-GI-NEXT: mov h4, v0.h[2] +; CHECK-GI-NEXT: mov h5, v0.h[3] +; CHECK-GI-NEXT: mov h6, v1.h[3] +; CHECK-GI-NEXT: mov v0.b[1], v2.b[0] +; CHECK-GI-NEXT: mov h2, v1.h[2] +; CHECK-GI-NEXT: mov v1.b[1], v3.b[0] +; CHECK-GI-NEXT: mov v0.b[2], v4.b[0] +; CHECK-GI-NEXT: mov v1.b[2], v2.b[0] +; CHECK-GI-NEXT: mov v0.b[3], v5.b[0] +; CHECK-GI-NEXT: mov v1.b[3], v6.b[0] +; CHECK-GI-NEXT: ushl v0.8b, v0.8b, v1.8b +; CHECK-GI-NEXT: mov b1, v0.b[1] +; CHECK-GI-NEXT: mov b2, v0.b[2] +; CHECK-GI-NEXT: mov b3, v0.b[3] +; CHECK-GI-NEXT: mov v0.b[1], v1.b[0] +; CHECK-GI-NEXT: mov v0.b[2], v2.b[0] +; CHECK-GI-NEXT: mov v0.b[3], v3.b[0] +; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-GI-NEXT: ret %3 = shl <4 x i8> %0, %1 ret <4 x i8> %3 } @@ -556,12 +576,27 @@ define <32 x i8> @shl_v32i8(<32 x i8> %0, <32 x i8> %1){ } define <2 x i16> @shl_v2i16(<2 x i16> %0, <2 x i16> %1){ -; CHECK-LABEL: shl_v2i16: -; CHECK: // %bb.0: -; CHECK-NEXT: movi d2, #0x00ffff0000ffff -; CHECK-NEXT: and v1.8b, v1.8b, v2.8b -; CHECK-NEXT: ushl v0.2s, v0.2s, v1.2s -; CHECK-NEXT: ret +; CHECK-SD-LABEL: shl_v2i16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: movi d2, #0x00ffff0000ffff +; CHECK-SD-NEXT: and v1.8b, v1.8b, v2.8b +; CHECK-SD-NEXT: ushl v0.2s, v0.2s, v1.2s +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: shl_v2i16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-GI-NEXT: mov s2, v0.s[1] +; CHECK-GI-NEXT: mov s3, v1.s[1] +; CHECK-GI-NEXT: mov v0.h[1], v2.h[0] +; CHECK-GI-NEXT: mov v1.h[1], v3.h[0] +; CHECK-GI-NEXT: ushl v0.4h, v0.4h, v1.4h +; CHECK-GI-NEXT: mov h1, v0.h[1] +; CHECK-GI-NEXT: mov v0.h[1], v1.h[0] +; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-GI-NEXT: ret %3 = shl <2 x i16> %0, %1 ret <2 x i16> %3 } @@ -633,14 +668,42 @@ define <4 x i64> @shl_v4i64(<4 x i64> %0, <4 x i64> %1){ } define <4 x i8> @ashr_v4i8(<4 x i8> %0, <4 x i8> %1){ -; CHECK-LABEL: ashr_v4i8: -; CHECK: // %bb.0: -; CHECK-NEXT: shl v0.4h, v0.4h, #8 -; CHECK-NEXT: bic v1.4h, #255, lsl #8 -; CHECK-NEXT: sshr v0.4h, v0.4h, #8 -; CHECK-NEXT: neg v1.4h, v1.4h -; CHECK-NEXT: sshl v0.4h, v0.4h, v1.4h -; CHECK-NEXT: ret +; CHECK-SD-LABEL: ashr_v4i8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: shl v0.4h, v0.4h, #8 +; CHECK-SD-NEXT: bic v1.4h, #255, lsl #8 +; CHECK-SD-NEXT: sshr v0.4h, v0.4h, #8 +; CHECK-SD-NEXT: neg v1.4h, v1.4h +; CHECK-SD-NEXT: sshl v0.4h, v0.4h, v1.4h +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: ashr_v4i8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-GI-NEXT: mov h2, v1.h[1] +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NEXT: mov h3, v0.h[1] +; CHECK-GI-NEXT: mov h4, v1.h[2] +; CHECK-GI-NEXT: mov h5, v1.h[3] +; CHECK-GI-NEXT: mov h6, v0.h[3] +; CHECK-GI-NEXT: mov v1.b[1], v2.b[0] +; CHECK-GI-NEXT: mov h2, v0.h[2] +; CHECK-GI-NEXT: mov v0.b[1], v3.b[0] +; CHECK-GI-NEXT: mov v1.b[2], v4.b[0] +; CHECK-GI-NEXT: mov v0.b[2], v2.b[0] +; CHECK-GI-NEXT: mov v1.b[3], v5.b[0] +; CHECK-GI-NEXT: mov v0.b[3], v6.b[0] +; CHECK-GI-NEXT: neg v1.8b, v1.8b +; CHECK-GI-NEXT: sshl v0.8b, v0.8b, v1.8b +; CHECK-GI-NEXT: mov b1, v0.b[1] +; CHECK-GI-NEXT: mov b2, v0.b[2] +; CHECK-GI-NEXT: mov b3, v0.b[3] +; CHECK-GI-NEXT: mov v0.b[1], v1.b[0] +; CHECK-GI-NEXT: mov v0.b[2], v2.b[0] +; CHECK-GI-NEXT: mov v0.b[3], v3.b[0] +; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-GI-NEXT: ret %3 = ashr <4 x i8> %0, %1 ret <4 x i8> %3 } @@ -658,15 +721,31 @@ define <32 x i8> @ashr_v32i8(<32 x i8> %0, <32 x i8> %1){ } define <2 x i16> @ashr_v2i16(<2 x i16> %0, <2 x i16> %1){ -; CHECK-LABEL: ashr_v2i16: -; CHECK: // %bb.0: -; CHECK-NEXT: movi d2, #0x00ffff0000ffff -; CHECK-NEXT: shl v0.2s, v0.2s, #16 -; CHECK-NEXT: sshr v0.2s, v0.2s, #16 -; CHECK-NEXT: and v1.8b, v1.8b, v2.8b -; CHECK-NEXT: neg v1.2s, v1.2s -; CHECK-NEXT: sshl v0.2s, v0.2s, v1.2s -; CHECK-NEXT: ret +; CHECK-SD-LABEL: ashr_v2i16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: movi d2, #0x00ffff0000ffff +; CHECK-SD-NEXT: shl v0.2s, v0.2s, #16 +; CHECK-SD-NEXT: sshr v0.2s, v0.2s, #16 +; CHECK-SD-NEXT: and v1.8b, v1.8b, v2.8b +; CHECK-SD-NEXT: neg v1.2s, v1.2s +; CHECK-SD-NEXT: sshl v0.2s, v0.2s, v1.2s +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: ashr_v2i16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-GI-NEXT: mov s2, v1.s[1] +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NEXT: mov s3, v0.s[1] +; CHECK-GI-NEXT: mov v1.h[1], v2.h[0] +; CHECK-GI-NEXT: mov v0.h[1], v3.h[0] +; CHECK-GI-NEXT: neg v1.4h, v1.4h +; CHECK-GI-NEXT: sshl v0.4h, v0.4h, v1.4h +; CHECK-GI-NEXT: mov h1, v0.h[1] +; CHECK-GI-NEXT: mov v0.h[1], v1.h[0] +; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-GI-NEXT: ret %3 = ashr <2 x i16> %0, %1 ret <2 x i16> %3 } @@ -727,13 +806,41 @@ define <4 x i64> @ashr_v4i64(<4 x i64> %0, <4 x i64> %1){ } define <4 x i8> @lshr_v4i8(<4 x i8> %0, <4 x i8> %1){ -; CHECK-LABEL: lshr_v4i8: -; CHECK: // %bb.0: -; CHECK-NEXT: bic v1.4h, #255, lsl #8 -; CHECK-NEXT: bic v0.4h, #255, lsl #8 -; CHECK-NEXT: neg v1.4h, v1.4h -; CHECK-NEXT: ushl v0.4h, v0.4h, v1.4h -; CHECK-NEXT: ret +; CHECK-SD-LABEL: lshr_v4i8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: bic v1.4h, #255, lsl #8 +; CHECK-SD-NEXT: bic v0.4h, #255, lsl #8 +; CHECK-SD-NEXT: neg v1.4h, v1.4h +; CHECK-SD-NEXT: ushl v0.4h, v0.4h, v1.4h +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: lshr_v4i8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-GI-NEXT: mov h2, v1.h[1] +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NEXT: mov h3, v0.h[1] +; CHECK-GI-NEXT: mov h4, v1.h[2] +; CHECK-GI-NEXT: mov h5, v1.h[3] +; CHECK-GI-NEXT: mov h6, v0.h[3] +; CHECK-GI-NEXT: mov v1.b[1], v2.b[0] +; CHECK-GI-NEXT: mov h2, v0.h[2] +; CHECK-GI-NEXT: mov v0.b[1], v3.b[0] +; CHECK-GI-NEXT: mov v1.b[2], v4.b[0] +; CHECK-GI-NEXT: mov v0.b[2], v2.b[0] +; CHECK-GI-NEXT: mov v1.b[3], v5.b[0] +; CHECK-GI-NEXT: mov v0.b[3], v6.b[0] +; CHECK-GI-NEXT: neg v1.8b, v1.8b +; CHECK-GI-NEXT: ushl v0.8b, v0.8b, v1.8b +; CHECK-GI-NEXT: mov b1, v0.b[1] +; CHECK-GI-NEXT: mov b2, v0.b[2] +; CHECK-GI-NEXT: mov b3, v0.b[3] +; CHECK-GI-NEXT: mov v0.b[1], v1.b[0] +; CHECK-GI-NEXT: mov v0.b[2], v2.b[0] +; CHECK-GI-NEXT: mov v0.b[3], v3.b[0] +; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-GI-NEXT: ret %3 = lshr <4 x i8> %0, %1 ret <4 x i8> %3 } @@ -751,14 +858,30 @@ define <32 x i8> @lshr_v32i8(<32 x i8> %0, <32 x i8> %1){ } define <2 x i16> @lshr_v2i16(<2 x i16> %0, <2 x i16> %1){ -; CHECK-LABEL: lshr_v2i16: -; CHECK: // %bb.0: -; CHECK-NEXT: movi d2, #0x00ffff0000ffff -; CHECK-NEXT: and v1.8b, v1.8b, v2.8b -; CHECK-NEXT: and v0.8b, v0.8b, v2.8b -; CHECK-NEXT: neg v1.2s, v1.2s -; CHECK-NEXT: ushl v0.2s, v0.2s, v1.2s -; CHECK-NEXT: ret +; CHECK-SD-LABEL: lshr_v2i16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: movi d2, #0x00ffff0000ffff +; CHECK-SD-NEXT: and v1.8b, v1.8b, v2.8b +; CHECK-SD-NEXT: and v0.8b, v0.8b, v2.8b +; CHECK-SD-NEXT: neg v1.2s, v1.2s +; CHECK-SD-NEXT: ushl v0.2s, v0.2s, v1.2s +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: lshr_v2i16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-GI-NEXT: mov s2, v1.s[1] +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NEXT: mov s3, v0.s[1] +; CHECK-GI-NEXT: mov v1.h[1], v2.h[0] +; CHECK-GI-NEXT: mov v0.h[1], v3.h[0] +; CHECK-GI-NEXT: neg v1.4h, v1.4h +; CHECK-GI-NEXT: ushl v0.4h, v0.4h, v1.4h +; CHECK-GI-NEXT: mov h1, v0.h[1] +; CHECK-GI-NEXT: mov v0.h[1], v1.h[0] +; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-GI-NEXT: ret %3 = lshr <2 x i16> %0, %1 ret <2 x i16> %3 }