diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 2f8fe09c3dc98..3fdb9bf7e5171 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -26068,9 +26068,11 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { // If the bitcast type isn't legal, it might be a trunc of a legal type; // look through the trunc so we can still do the transform: // concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar) + // However, this is only equivalent on little-endian targets. if (Scalar->getOpcode() == ISD::TRUNCATE && !TLI.isTypeLegal(Scalar.getValueType()) && - TLI.isTypeLegal(Scalar->getOperand(0).getValueType())) + TLI.isTypeLegal(Scalar->getOperand(0).getValueType()) && + DAG.getDataLayout().isLittleEndian()) Scalar = Scalar->getOperand(0); EVT SclTy = Scalar.getValueType(); diff --git a/llvm/test/CodeGen/SystemZ/vec-trunc-to-i16.ll b/llvm/test/CodeGen/SystemZ/vec-trunc-to-i16.ll new file mode 100644 index 0000000000000..42d787d945145 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/vec-trunc-to-i16.ll @@ -0,0 +1,45 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s + +; Test that truncated scalars use the correct vector insert instruction. +; On big-endian targets, concat_vectors should not skip truncates when +; creating scalar_to_vector, as the bytes would be in the wrong position. + +; This truncated i16 should use vlvgh (insert halfword), not vlvgf (insert fullword). +define <16 x i8> @test_concat_trunc_i16(i32 %x) { +; CHECK-LABEL: test_concat_trunc_i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vlvgh %v24, %r2, 0 +; CHECK-NEXT: br %r14 + %t = trunc i32 %x to i16 + %vec = bitcast i16 %t to <2 x i8> + %result = shufflevector <2 x i8> %vec, <2 x i8> poison, <16 x i32> + ret <16 x i8> %result +} + +; Test with a more complex shuffle pattern, reduced from a Rust bug report. +define fastcc void @test_shuffle_with_trunc() { +; CHECK-LABEL: test_shuffle_with_trunc: +; CHECK: # %bb.0: +; CHECK-NEXT: lh %r1, 0 +; CHECK-NEXT: l %r0, 0 +; CHECK-NEXT: vlvgh %v1, %r1, 0 +; CHECK-NEXT: larl %r1, .LCPI1_0 +; CHECK-NEXT: vl %v2, 0(%r1), 3 +; CHECK-NEXT: vlvgf %v0, %r0, 0 +; CHECK-NEXT: vperm %v0, %v0, %v1, %v2 +; CHECK-NEXT: vst %v0, 0, 3 +; CHECK-NEXT: br %r14 + %1 = load i32, ptr null, align 8 + %2 = load i16, ptr null, align 1 + br label %3 + +3: + %4 = bitcast i32 %1 to <4 x i8> + %5 = shufflevector <4 x i8> %4, <4 x i8> zeroinitializer, <16 x i32> + %6 = bitcast i16 %2 to <2 x i8> + %7 = shufflevector <2 x i8> %6, <2 x i8> zeroinitializer, <16 x i32> + %8 = shufflevector <16 x i8> %5, <16 x i8> %7, <16 x i32> + store <16 x i8> %8, ptr null, align 8 + ret void +}