diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 6a0a5aa4ba415..ddda8448b3099 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -5214,17 +5214,21 @@ static SDValue lowerShuffleViaVRegSplitting(ShuffleVectorSDNode *SVN, SmallDenseMap Values; for (unsigned I : seq(Data.size())) { const auto &[Idx1, Idx2, _] = Data[I]; - if (Values.contains(Idx1)) { - assert(Idx2 != UINT_MAX && Values.contains(Idx2) && - "Expected both indices to be extracted already."); - break; + // If the shuffle contains permutation of odd number of elements, + // Idx1 might be used already in the first iteration. + // + // Idx1 = shuffle Idx1, Idx2 + // Idx1 = shuffle Idx1, Idx3 + SDValue &V = Values.try_emplace(Idx1).first->getSecond(); + if (!V) + V = ExtractValue(Idx1 >= NumOfSrcRegs ? V2 : V1, + (Idx1 % NumOfSrcRegs) * NumOpElts); + if (Idx2 != UINT_MAX) { + SDValue &V = Values.try_emplace(Idx2).first->getSecond(); + if (!V) + V = ExtractValue(Idx2 >= NumOfSrcRegs ? V2 : V1, + (Idx2 % NumOfSrcRegs) * NumOpElts); } - SDValue V = ExtractValue(Idx1 >= NumOfSrcRegs ? V2 : V1, - (Idx1 % NumOfSrcRegs) * NumOpElts); - Values[Idx1] = V; - if (Idx2 != UINT_MAX) - Values[Idx2] = ExtractValue(Idx2 >= NumOfSrcRegs ? V2 : V1, - (Idx2 % NumOfSrcRegs) * NumOpElts); } SDValue V; for (const auto &[Idx1, Idx2, Mask] : Data) { diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-exact-vlen.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-exact-vlen.ll index afd560fd74d16..c0c17d4e0623e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-exact-vlen.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-exact-vlen.ll @@ -431,3 +431,31 @@ define void @shuffle_i256_ldst(ptr %p) vscale_range(2,2) { store <4 x i256> %res, ptr %p ret void } + +define void @shuffle_3_input_vectors() vscale_range(4,4) { +; CHECK-LABEL: shuffle_3_input_vectors: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; CHECK-NEXT: vmv.v.i v8, 1 +; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.i v0, 6 +; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, mu +; CHECK-NEXT: vslidedown.vi v20, v8, 1, v0.t +; CHECK-NEXT: vslideup.vi v20, v9, 3 +; CHECK-NEXT: vslidedown.vi v21, v9, 1 +; CHECK-NEXT: vmv1r.v v22, v8 +; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; CHECK-NEXT: vmsgt.vi v8, v16, 0 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: sb a0, 0(zero) +; CHECK-NEXT: ret + %1 = shufflevector <32 x i64> zeroinitializer, <32 x i64> splat (i64 1), <32 x i32> + %2 = icmp slt <32 x i64> zeroinitializer, %1 + %3 = bitcast <32 x i1> %2 to i32 + %4 = trunc i32 %3 to i8 + store i8 %4, ptr null, align 1 + ret void +}