diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 6411f27da0776..18f6a695e4502 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -15293,6 +15293,12 @@ static SDValue lowerShuffleAsRepeatedMaskAndLanePermute( for (int i = 0; i != NumElts; i += NumBroadcastElts) for (int j = 0; j != NumBroadcastElts; ++j) BroadcastMask[i + j] = j; + + // Avoid returning the same shuffle operation. For example, + // v8i32 = vector_shuffle<0,1,0,1,0,1,0,1> t5, undef:v8i32 + if (BroadcastMask == Mask) + return SDValue(); + return DAG.getVectorShuffle(VT, DL, RepeatShuf, DAG.getUNDEF(VT), BroadcastMask); } diff --git a/llvm/test/CodeGen/X86/shuffle-combine-crash-5.ll b/llvm/test/CodeGen/X86/shuffle-combine-crash-5.ll new file mode 100644 index 0000000000000..f012c05a09573 --- /dev/null +++ b/llvm/test/CodeGen/X86/shuffle-combine-crash-5.ll @@ -0,0 +1,30 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512bw,avx512vl | FileCheck %s + +define i1 @test(ptr %q) { +; CHECK-LABEL: test: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: vpbroadcastq %xmm0, %ymm0 +; CHECK-NEXT: vptest %ymm0, %ymm0 +; CHECK-NEXT: sete %al +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +entry: + %0 = load i64, ptr %q, align 8 + %add = add nsw i64 %0, 0 + %add2 = add nsw i64 %add, 0 + %add5 = add nsw i64 %add2, 0 + %vecinit1.i.i68 = insertelement <2 x i64> poison, i64 %add5, i64 0 + %add8 = add nsw i64 %add5, 0 + %vecinit.i.i55 = insertelement <4 x i64> undef, i64 %add8, i64 0 + %1 = bitcast <2 x i64> %vecinit1.i.i68 to <4 x i32> + %2 = shufflevector <4 x i32> %1, <4 x i32> poison, <8 x i32> + %3 = bitcast <4 x i64> %vecinit.i.i55 to <8 x i32> + %4 = shufflevector <8 x i32> %3, <8 x i32> poison, <8 x i32> + %5 = icmp ne <8 x i32> %2, %4 + %6 = bitcast <8 x i1> %5 to i8 + %7 = icmp eq i8 %6, 0 + ret i1 %7 +}