From ed4c1dfb83a5d3d33f185559a4820b7d3fefb45e Mon Sep 17 00:00:00 2001 From: Chaitanya Koparkar Date: Sun, 21 Sep 2025 16:01:33 -0400 Subject: [PATCH 1/5] [VectorCombine] foldShuffleOfCastops - handle unary shuffles --- .../Transforms/Vectorize/VectorCombine.cpp | 67 ++++++++++++------- 1 file changed, 44 insertions(+), 23 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index 17cb18a22336a..c2a4353b3eb62 100644 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -2477,21 +2477,28 @@ bool VectorCombine::foldShuffleOfCastops(Instruction &I) { if (!match(&I, m_Shuffle(m_Value(V0), m_Value(V1), m_Mask(OldMask)))) return false; + // Check whether this is a unary shuffle. + // TODO: should this be extended to match undef or unused values. + bool IsBinaryShuffle = !isa(V1); + LLVM_DEBUG(dbgs() << "Is binary shuffle: " << IsBinaryShuffle << "\n"); + auto *C0 = dyn_cast(V0); auto *C1 = dyn_cast(V1); - if (!C0 || !C1) + if (!C0 || (IsBinaryShuffle && !C1)) return false; Instruction::CastOps Opcode = C0->getOpcode(); - if (C0->getSrcTy() != C1->getSrcTy()) - return false; - // Handle shuffle(zext_nneg(x), sext(y)) -> sext(shuffle(x,y)) folds. - if (Opcode != C1->getOpcode()) { - if (match(C0, m_SExtLike(m_Value())) && match(C1, m_SExtLike(m_Value()))) - Opcode = Instruction::SExt; - else + if (IsBinaryShuffle) { + if (C0->getSrcTy() != C1->getSrcTy()) return false; + // Handle shuffle(zext_nneg(x), sext(y)) -> sext(shuffle(x,y)) folds. + if (Opcode != C1->getOpcode()) { + if (match(C0, m_SExtLike(m_Value())) && match(C1, m_SExtLike(m_Value()))) + Opcode = Instruction::SExt; + else + return false; + } } auto *ShuffleDstTy = dyn_cast(I.getType()); @@ -2534,23 +2541,31 @@ bool VectorCombine::foldShuffleOfCastops(Instruction &I) { InstructionCost CostC0 = TTI.getCastInstrCost(C0->getOpcode(), CastDstTy, CastSrcTy, TTI::CastContextHint::None, CostKind); - InstructionCost CostC1 = - TTI.getCastInstrCost(C1->getOpcode(), CastDstTy, CastSrcTy, - TTI::CastContextHint::None, CostKind); - InstructionCost OldCost = CostC0 + CostC1; - OldCost += - TTI.getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc, ShuffleDstTy, - CastDstTy, OldMask, CostKind, 0, nullptr, {}, &I); - InstructionCost NewCost = - TTI.getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc, NewShuffleDstTy, - CastSrcTy, NewMask, CostKind); + TargetTransformInfo::ShuffleKind ShuffleKind; + if (IsBinaryShuffle) + ShuffleKind = TargetTransformInfo::SK_PermuteTwoSrc; + else + ShuffleKind = TargetTransformInfo::SK_PermuteSingleSrc; + + InstructionCost OldCost = CostC0; + OldCost += TTI.getShuffleCost(ShuffleKind, ShuffleDstTy, CastDstTy, OldMask, + CostKind, 0, nullptr, {}, &I); + + InstructionCost NewCost = TTI.getShuffleCost(ShuffleKind, NewShuffleDstTy, + CastSrcTy, NewMask, CostKind); NewCost += TTI.getCastInstrCost(Opcode, ShuffleDstTy, NewShuffleDstTy, TTI::CastContextHint::None, CostKind); if (!C0->hasOneUse()) NewCost += CostC0; - if (!C1->hasOneUse()) - NewCost += CostC1; + if (IsBinaryShuffle) { + InstructionCost CostC1 = + TTI.getCastInstrCost(C1->getOpcode(), CastDstTy, CastSrcTy, + TTI::CastContextHint::None, CostKind); + OldCost += CostC1; + if (!C1->hasOneUse()) + NewCost += CostC1; + } LLVM_DEBUG(dbgs() << "Found a shuffle feeding two casts: " << I << "\n OldCost: " << OldCost << " vs NewCost: " << NewCost @@ -2558,14 +2573,20 @@ bool VectorCombine::foldShuffleOfCastops(Instruction &I) { if (NewCost > OldCost) return false; - Value *Shuf = Builder.CreateShuffleVector(C0->getOperand(0), - C1->getOperand(0), NewMask); + Value *Shuf; + if (IsBinaryShuffle) + Shuf = Builder.CreateShuffleVector(C0->getOperand(0), C1->getOperand(0), + NewMask); + else + Shuf = Builder.CreateShuffleVector(C0->getOperand(0), NewMask); + Value *Cast = Builder.CreateCast(Opcode, Shuf, ShuffleDstTy); // Intersect flags from the old casts. if (auto *NewInst = dyn_cast(Cast)) { NewInst->copyIRFlags(C0); - NewInst->andIRFlags(C1); + if (IsBinaryShuffle) + NewInst->andIRFlags(C1); } Worklist.pushValue(Shuf); From 8ae981639ffb9b4e48b7452819d003e0c3b3dab9 Mon Sep 17 00:00:00 2001 From: Chaitanya Koparkar Date: Tue, 23 Sep 2025 08:50:25 -0400 Subject: [PATCH 2/5] Fix shuffletoidentity test --- llvm/lib/Transforms/Vectorize/VectorCombine.cpp | 4 ++-- .../VectorCombine/AArch64/shuffletoidentity.ll | 17 ++++++++--------- 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index 7c3c219b94190..526a4add2a89a 100644 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -2488,9 +2488,9 @@ bool VectorCombine::foldShuffleOfCastops(Instruction &I) { return false; // Check whether this is a unary shuffle. - // TODO: should this be extended to match undef or unused values. + // TODO: check if this can be extended to match undef or unused values, + // perhaps using ShuffleVectorInst::isSingleSource. bool IsBinaryShuffle = !isa(V1); - LLVM_DEBUG(dbgs() << "Is binary shuffle: " << IsBinaryShuffle << "\n"); auto *C0 = dyn_cast(V0); auto *C1 = dyn_cast(V1); diff --git a/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll b/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll index acbc836ffcab0..ed29719d49493 100644 --- a/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll +++ b/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll @@ -205,8 +205,8 @@ define <8 x i8> @abs_different(<8 x i8> %a) { define <4 x i32> @poison_intrinsic(<2 x i16> %l256) { ; CHECK-LABEL: @poison_intrinsic( ; CHECK-NEXT: [[L266:%.*]] = call <2 x i16> @llvm.abs.v2i16(<2 x i16> [[L256:%.*]], i1 false) -; CHECK-NEXT: [[L267:%.*]] = zext <2 x i16> [[L266]] to <2 x i32> -; CHECK-NEXT: [[L271:%.*]] = shufflevector <2 x i32> [[L267]], <2 x i32> poison, <4 x i32> +; CHECK-NEXT: [[L267:%.*]] = shufflevector <2 x i16> [[L266]], <2 x i16> poison, <4 x i32> +; CHECK-NEXT: [[L271:%.*]] = zext <4 x i16> [[L267]] to <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[L271]] ; %l266 = call <2 x i16> @llvm.abs.v2i16(<2 x i16> %l256, i1 false) @@ -534,9 +534,9 @@ define <4 x i64> @single_zext(<4 x i32> %x) { define <4 x i64> @not_zext(<4 x i32> %x) { ; CHECK-LABEL: @not_zext( -; CHECK-NEXT: [[ZEXT:%.*]] = zext <4 x i32> [[X:%.*]] to <4 x i64> -; CHECK-NEXT: [[REVSHUF:%.*]] = shufflevector <4 x i64> [[ZEXT]], <4 x i64> poison, <4 x i32> -; CHECK-NEXT: ret <4 x i64> [[REVSHUF]] +; CHECK-NEXT: [[REVSHUF:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[ZEXT:%.*]] = zext <4 x i32> [[REVSHUF:%.*]] to <4 x i64> +; CHECK-NEXT: ret <4 x i64> [[ZEXT]] ; %zext = zext <4 x i32> %x to <4 x i64> %revshuf = shufflevector <4 x i64> %zext, <4 x i64> poison, <4 x i32> @@ -922,10 +922,9 @@ define <4 x i8> @singleop(<4 x i8> %a, <4 x i8> %b) { define <4 x i64> @cast_mismatched_types(<4 x i32> %x) { ; CHECK-LABEL: @cast_mismatched_types( -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <2 x i32> -; CHECK-NEXT: [[ZEXT:%.*]] = zext <2 x i32> [[SHUF]] to <2 x i64> -; CHECK-NEXT: [[EXTSHUF:%.*]] = shufflevector <2 x i64> [[ZEXT]], <2 x i64> poison, <4 x i32> -; CHECK-NEXT: ret <4 x i64> [[EXTSHUF]] +; CHECK-SAME: <4 x i32> [[X:%.*]]) { +; CHECK-NEXT: [[ZEXT:%.*]] = zext <4 x i32> [[X]] to <4 x i64> +; CHECK-NEXT: ret <4 x i64> [[ZEXT]] ; %shuf = shufflevector <4 x i32> %x, <4 x i32> poison, <2 x i32> %zext = zext <2 x i32> %shuf to <2 x i64> From 8c27278bd39af0ae2ffe768180da09239e4265fd Mon Sep 17 00:00:00 2001 From: Chaitanya Koparkar Date: Wed, 24 Sep 2025 08:40:05 -0400 Subject: [PATCH 3/5] Break loop with foldBitcastOfShuffle --- .../Transforms/Vectorize/VectorCombine.cpp | 6 + .../AArch64/combine-shuffle-ext.ll | 134 +++++++++--------- 2 files changed, 73 insertions(+), 67 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index 526a4add2a89a..2a9f637791dfe 100644 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -2499,6 +2499,12 @@ bool VectorCombine::foldShuffleOfCastops(Instruction &I) { Instruction::CastOps Opcode = C0->getOpcode(); + // If this is allowed, foldShuffleOfCastops can get stuck in a loop + // with foldBitcastOfShuffle. Reject in favor of foldBitcastOfShuffle. + if (!IsBinaryShuffle && Opcode == Instruction::BitCast) { + return false; + } + if (IsBinaryShuffle) { if (C0->getSrcTy() != C1->getSrcTy()) return false; diff --git a/llvm/test/Transforms/VectorCombine/AArch64/combine-shuffle-ext.ll b/llvm/test/Transforms/VectorCombine/AArch64/combine-shuffle-ext.ll index 6341c8945247d..1503a1b51d256 100644 --- a/llvm/test/Transforms/VectorCombine/AArch64/combine-shuffle-ext.ll +++ b/llvm/test/Transforms/VectorCombine/AArch64/combine-shuffle-ext.ll @@ -14,9 +14,9 @@ define <4 x i32> @load_i32_zext_to_v4i32(ptr %di) { ; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[DI]], align 4 ; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i32> , i32 [[L]], i64 0 ; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i32> [[VEC_INS]] to <8 x i8> -; CHECK-NEXT: [[E_1:%.*]] = zext <8 x i8> [[VEC_BC]] to <8 x i16> -; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i16> [[E_1]], <8 x i16> poison, <4 x i32> -; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <4 x i16> [[VEC_SHUFFLE]] to <4 x i32> +; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i8> [[VEC_BC]], <8 x i8> poison, <4 x i32> +; CHECK-NEXT: [[E_1:%.*]] = zext <4 x i8> [[VEC_SHUFFLE]] to <4 x i16> +; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <4 x i16> [[E_1]] to <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[EXT_2]] ; entry: @@ -36,9 +36,9 @@ define <4 x i32> @load_i32_zext_to_v4i32_both_nneg(ptr %di) { ; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[DI]], align 4 ; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i32> , i32 [[L]], i64 0 ; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i32> [[VEC_INS]] to <8 x i8> -; CHECK-NEXT: [[E_1:%.*]] = zext nneg <8 x i8> [[VEC_BC]] to <8 x i16> -; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i16> [[E_1]], <8 x i16> poison, <4 x i32> -; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <4 x i16> [[VEC_SHUFFLE]] to <4 x i32> +; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i8> [[VEC_BC]], <8 x i8> poison, <4 x i32> +; CHECK-NEXT: [[E_1:%.*]] = zext nneg <4 x i8> [[VEC_SHUFFLE]] to <4 x i16> +; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <4 x i16> [[E_1]] to <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[EXT_2]] ; entry: @@ -58,9 +58,9 @@ define <4 x i32> @load_i32_zext_to_v4i32_inner_nneg(ptr %di) { ; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[DI]], align 4 ; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i32> , i32 [[L]], i64 0 ; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i32> [[VEC_INS]] to <8 x i8> -; CHECK-NEXT: [[E_1:%.*]] = zext nneg <8 x i8> [[VEC_BC]] to <8 x i16> -; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i16> [[E_1]], <8 x i16> poison, <4 x i32> -; CHECK-NEXT: [[EXT_2:%.*]] = zext <4 x i16> [[VEC_SHUFFLE]] to <4 x i32> +; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i8> [[VEC_BC]], <8 x i8> poison, <4 x i32> +; CHECK-NEXT: [[E_1:%.*]] = zext nneg <4 x i8> [[VEC_SHUFFLE]] to <4 x i16> +; CHECK-NEXT: [[EXT_2:%.*]] = zext <4 x i16> [[E_1]] to <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[EXT_2]] ; entry: @@ -80,9 +80,9 @@ define <4 x i32> @load_i32_zext_to_v4i32_outer_nneg(ptr %di) { ; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[DI]], align 4 ; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i32> , i32 [[L]], i64 0 ; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i32> [[VEC_INS]] to <8 x i8> -; CHECK-NEXT: [[E_1:%.*]] = zext <8 x i8> [[VEC_BC]] to <8 x i16> -; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i16> [[E_1]], <8 x i16> poison, <4 x i32> -; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <4 x i16> [[VEC_SHUFFLE]] to <4 x i32> +; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i8> [[VEC_BC]], <8 x i8> poison, <4 x i32> +; CHECK-NEXT: [[E_1:%.*]] = zext <4 x i8> [[VEC_SHUFFLE]] to <4 x i16> +; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <4 x i16> [[E_1]] to <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[EXT_2]] ; entry: @@ -102,9 +102,9 @@ define <4 x i32> @load_i32_zext_to_v4i32_inner_nneg_outer_sext(ptr %di) { ; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[DI]], align 4 ; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i32> , i32 [[L]], i64 0 ; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i32> [[VEC_INS]] to <8 x i8> -; CHECK-NEXT: [[E_1:%.*]] = zext nneg <8 x i8> [[VEC_BC]] to <8 x i16> -; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i16> [[E_1]], <8 x i16> poison, <4 x i32> -; CHECK-NEXT: [[EXT_2:%.*]] = sext <4 x i16> [[VEC_SHUFFLE]] to <4 x i32> +; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i8> [[VEC_BC]], <8 x i8> poison, <4 x i32> +; CHECK-NEXT: [[E_1:%.*]] = zext nneg <4 x i8> [[VEC_SHUFFLE]] to <4 x i16> +; CHECK-NEXT: [[EXT_2:%.*]] = sext <4 x i16> [[E_1]] to <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[EXT_2]] ; entry: @@ -125,9 +125,9 @@ define <4 x i32> @load_i32_zext_to_v4i32_clobber_after_load(ptr %di) { ; CHECK-NEXT: call void @use.i32(i32 0) ; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i32> , i32 [[L]], i64 0 ; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i32> [[VEC_INS]] to <8 x i8> -; CHECK-NEXT: [[E_1:%.*]] = zext <8 x i8> [[VEC_BC]] to <8 x i16> -; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i16> [[E_1]], <8 x i16> poison, <4 x i32> -; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <4 x i16> [[VEC_SHUFFLE]] to <4 x i32> +; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i8> [[VEC_BC]], <8 x i8> poison, <4 x i32> +; CHECK-NEXT: [[E_1:%.*]] = zext <4 x i8> [[VEC_SHUFFLE]] to <4 x i16> +; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <4 x i16> [[E_1]] to <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[EXT_2]] ; entry: @@ -148,9 +148,9 @@ define <4 x i32> @load_i32_sext_zext_to_v4i32(ptr %di) { ; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[DI]], align 4 ; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i32> , i32 [[L]], i64 0 ; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i32> [[VEC_INS]] to <8 x i8> -; CHECK-NEXT: [[E_1:%.*]] = sext <8 x i8> [[VEC_BC]] to <8 x i16> -; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i16> [[E_1]], <8 x i16> poison, <4 x i32> -; CHECK-NEXT: [[EXT_2:%.*]] = zext <4 x i16> [[VEC_SHUFFLE]] to <4 x i32> +; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i8> [[VEC_BC]], <8 x i8> poison, <4 x i32> +; CHECK-NEXT: [[E_1:%.*]] = sext <4 x i8> [[VEC_SHUFFLE]] to <4 x i16> +; CHECK-NEXT: [[EXT_2:%.*]] = zext <4 x i16> [[E_1]] to <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[EXT_2]] ; entry: @@ -170,9 +170,9 @@ define <4 x i32> @load_i32_zext_to_v4i32_load_other_users(ptr %di) { ; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[DI]], align 4 ; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i32> , i32 [[L]], i64 0 ; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i32> [[VEC_INS]] to <8 x i8> -; CHECK-NEXT: [[E_1:%.*]] = zext <8 x i8> [[VEC_BC]] to <8 x i16> -; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i16> [[E_1]], <8 x i16> poison, <4 x i32> -; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <4 x i16> [[VEC_SHUFFLE]] to <4 x i32> +; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i8> [[VEC_BC]], <8 x i8> poison, <4 x i32> +; CHECK-NEXT: [[E_1:%.*]] = zext <4 x i8> [[VEC_SHUFFLE]] to <4 x i16> +; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <4 x i16> [[E_1]] to <4 x i32> ; CHECK-NEXT: call void @use.i32(i32 [[L]]) ; CHECK-NEXT: ret <4 x i32> [[EXT_2]] ; @@ -194,9 +194,9 @@ define <4 x i32> @load_i32_zext_to_v4i32_ins_other_users(ptr %di) { ; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[DI]], align 4 ; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i32> , i32 [[L]], i64 0 ; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i32> [[VEC_INS]] to <8 x i8> -; CHECK-NEXT: [[E_1:%.*]] = zext <8 x i8> [[VEC_BC]] to <8 x i16> -; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i16> [[E_1]], <8 x i16> poison, <4 x i32> -; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <4 x i16> [[VEC_SHUFFLE]] to <4 x i32> +; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i8> [[VEC_BC]], <8 x i8> poison, <4 x i32> +; CHECK-NEXT: [[E_1:%.*]] = zext <4 x i8> [[VEC_SHUFFLE]] to <4 x i16> +; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <4 x i16> [[E_1]] to <4 x i32> ; CHECK-NEXT: call void @use.v2i32(<2 x i32> [[VEC_INS]]) ; CHECK-NEXT: ret <4 x i32> [[EXT_2]] ; @@ -218,9 +218,9 @@ define <4 x i32> @load_i32_zext_to_v4i32_bc_other_users(ptr %di) { ; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[DI]], align 4 ; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i32> , i32 [[L]], i64 0 ; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i32> [[VEC_INS]] to <8 x i8> -; CHECK-NEXT: [[E_1:%.*]] = zext <8 x i8> [[VEC_BC]] to <8 x i16> -; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i16> [[E_1]], <8 x i16> poison, <4 x i32> -; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <4 x i16> [[VEC_SHUFFLE]] to <4 x i32> +; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i8> [[VEC_BC]], <8 x i8> poison, <4 x i32> +; CHECK-NEXT: [[E_1:%.*]] = zext <4 x i8> [[VEC_SHUFFLE]] to <4 x i16> +; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <4 x i16> [[E_1]] to <4 x i32> ; CHECK-NEXT: call void @use.v8i8(<8 x i8> [[VEC_BC]]) ; CHECK-NEXT: ret <4 x i32> [[EXT_2]] ; @@ -266,10 +266,10 @@ define <4 x i32> @load_i32_zext_to_v4i32_shuffle_other_users(ptr %di) { ; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[DI]], align 4 ; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i32> , i32 [[L]], i64 0 ; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i32> [[VEC_INS]] to <8 x i8> -; CHECK-NEXT: [[E_1:%.*]] = zext <8 x i8> [[VEC_BC]] to <8 x i16> -; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i16> [[E_1]], <8 x i16> poison, <4 x i32> -; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <4 x i16> [[VEC_SHUFFLE]] to <4 x i32> -; CHECK-NEXT: call void @use.v8i16(<4 x i16> [[VEC_SHUFFLE]]) +; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i8> [[VEC_BC]], <8 x i8> poison, <4 x i32> +; CHECK-NEXT: [[E_1:%.*]] = zext <4 x i8> [[VEC_SHUFFLE]] to <4 x i16> +; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <4 x i16> [[E_1]] to <4 x i32> +; CHECK-NEXT: call void @use.v8i16(<4 x i16> [[E_1]]) ; CHECK-NEXT: ret <4 x i32> [[EXT_2]] ; entry: @@ -290,9 +290,9 @@ define <8 x i32> @load_i64_zext_to_v8i32(ptr %di) { ; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[DI]], align 8 ; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i64> , i64 [[L]], i64 0 ; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i64> [[VEC_INS]] to <16 x i8> -; CHECK-NEXT: [[EXT_1:%.*]] = zext <16 x i8> [[VEC_BC]] to <16 x i16> -; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <16 x i16> [[EXT_1]], <16 x i16> poison, <8 x i32> -; CHECK-NEXT: [[OUTER_EXT:%.*]] = zext nneg <8 x i16> [[VEC_SHUFFLE]] to <8 x i32> +; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <16 x i8> [[VEC_BC]], <16 x i8> poison, <8 x i32> +; CHECK-NEXT: [[EXT_1:%.*]] = zext <8 x i8> [[VEC_SHUFFLE]] to <8 x i16> +; CHECK-NEXT: [[OUTER_EXT:%.*]] = zext nneg <8 x i16> [[EXT_1]] to <8 x i32> ; CHECK-NEXT: ret <8 x i32> [[OUTER_EXT]] ; entry: @@ -312,9 +312,9 @@ define <3 x i32> @load_i24_zext_to_v3i32(ptr %di) { ; CHECK-NEXT: [[L:%.*]] = load i24, ptr [[DI]], align 4 ; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i24> , i24 [[L]], i64 0 ; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i24> [[VEC_INS]] to <6 x i8> -; CHECK-NEXT: [[EXT_1:%.*]] = zext <6 x i8> [[VEC_BC]] to <6 x i16> -; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <6 x i16> [[EXT_1]], <6 x i16> poison, <3 x i32> -; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <3 x i16> [[VEC_SHUFFLE]] to <3 x i32> +; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <6 x i8> [[VEC_BC]], <6 x i8> poison, <3 x i32> +; CHECK-NEXT: [[EXT_1:%.*]] = zext <3 x i8> [[VEC_SHUFFLE]] to <3 x i16> +; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <3 x i16> [[EXT_1]] to <3 x i32> ; CHECK-NEXT: ret <3 x i32> [[EXT_2]] ; entry: @@ -334,9 +334,9 @@ define <4 x i32> @load_i32_insert_idx_1_sext(ptr %di) { ; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[DI]], align 4 ; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i32> , i32 [[L]], i64 1 ; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i32> [[VEC_INS]] to <8 x i8> -; CHECK-NEXT: [[EXT_1:%.*]] = zext <8 x i8> [[VEC_BC]] to <8 x i16> -; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i16> [[EXT_1]], <8 x i16> poison, <4 x i32> -; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <4 x i16> [[VEC_SHUFFLE]] to <4 x i32> +; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i8> [[VEC_BC]], <8 x i8> poison, <4 x i32> +; CHECK-NEXT: [[EXT_1:%.*]] = zext <4 x i8> [[VEC_SHUFFLE]] to <4 x i16> +; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <4 x i16> [[EXT_1]] to <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[EXT_2]] ; entry: @@ -356,9 +356,9 @@ define <4 x i32> @mask_extracts_not_all_elements_1_sext(ptr %di) { ; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[DI]], align 4 ; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i32> , i32 [[L]], i64 0 ; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i32> [[VEC_INS]] to <8 x i8> -; CHECK-NEXT: [[EXT_1:%.*]] = zext <8 x i8> [[VEC_BC]] to <8 x i16> -; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i16> [[EXT_1]], <8 x i16> poison, <4 x i32> -; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <4 x i16> [[VEC_SHUFFLE]] to <4 x i32> +; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i8> [[VEC_BC]], <8 x i8> poison, <4 x i32> +; CHECK-NEXT: [[EXT_1:%.*]] = zext <4 x i8> [[VEC_SHUFFLE]] to <4 x i16> +; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <4 x i16> [[EXT_1]] to <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[EXT_2]] ; entry: @@ -378,9 +378,9 @@ define <4 x i32> @mask_extracts_not_all_elements_2_sext(ptr %di) { ; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[DI]], align 4 ; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i32> , i32 [[L]], i64 0 ; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i32> [[VEC_INS]] to <8 x i8> -; CHECK-NEXT: [[EXT_1:%.*]] = zext <8 x i8> [[VEC_BC]] to <8 x i16> -; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i16> [[EXT_1]], <8 x i16> poison, <4 x i32> -; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <4 x i16> [[VEC_SHUFFLE]] to <4 x i32> +; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i8> [[VEC_BC]], <8 x i8> poison, <4 x i32> +; CHECK-NEXT: [[EXT_1:%.*]] = zext <4 x i8> [[VEC_SHUFFLE]] to <4 x i16> +; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <4 x i16> [[EXT_1]] to <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[EXT_2]] ; entry: @@ -422,9 +422,9 @@ define <4 x i32> @load_i32_sext_to_v4i32(ptr %di) { ; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[DI]], align 4 ; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i32> , i32 [[L]], i64 0 ; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i32> [[VEC_INS]] to <8 x i8> -; CHECK-NEXT: [[E_1:%.*]] = sext <8 x i8> [[VEC_BC]] to <8 x i16> -; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i16> [[E_1]], <8 x i16> poison, <4 x i32> -; CHECK-NEXT: [[EXT_2:%.*]] = sext <4 x i16> [[VEC_SHUFFLE]] to <4 x i32> +; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i8> [[VEC_BC]], <8 x i8> poison, <4 x i32> +; CHECK-NEXT: [[E_1:%.*]] = sext <4 x i8> [[VEC_SHUFFLE]] to <4 x i16> +; CHECK-NEXT: [[EXT_2:%.*]] = sext <4 x i16> [[E_1]] to <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[EXT_2]] ; entry: @@ -444,9 +444,9 @@ define <8 x i32> @load_i64_sext_to_v8i32(ptr %di) { ; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[DI]], align 8 ; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i64> , i64 [[L]], i64 0 ; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i64> [[VEC_INS]] to <16 x i8> -; CHECK-NEXT: [[EXT_1:%.*]] = sext <16 x i8> [[VEC_BC]] to <16 x i16> -; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <16 x i16> [[EXT_1]], <16 x i16> poison, <8 x i32> -; CHECK-NEXT: [[OUTER_EXT:%.*]] = sext <8 x i16> [[VEC_SHUFFLE]] to <8 x i32> +; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <16 x i8> [[VEC_BC]], <16 x i8> poison, <8 x i32> +; CHECK-NEXT: [[EXT_1:%.*]] = sext <8 x i8> [[VEC_SHUFFLE]] to <8 x i16> +; CHECK-NEXT: [[OUTER_EXT:%.*]] = sext <8 x i16> [[EXT_1]] to <8 x i32> ; CHECK-NEXT: ret <8 x i32> [[OUTER_EXT]] ; entry: @@ -466,9 +466,9 @@ define <3 x i32> @load_i24_sext_to_v3i32(ptr %di) { ; CHECK-NEXT: [[L:%.*]] = load i24, ptr [[DI]], align 4 ; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i24> , i24 [[L]], i64 0 ; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i24> [[VEC_INS]] to <6 x i8> -; CHECK-NEXT: [[EXT_1:%.*]] = sext <6 x i8> [[VEC_BC]] to <6 x i16> -; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <6 x i16> [[EXT_1]], <6 x i16> poison, <3 x i32> -; CHECK-NEXT: [[EXT_2:%.*]] = sext <3 x i16> [[VEC_SHUFFLE]] to <3 x i32> +; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <6 x i8> [[VEC_BC]], <6 x i8> poison, <3 x i32> +; CHECK-NEXT: [[EXT_1:%.*]] = sext <3 x i8> [[VEC_SHUFFLE]] to <3 x i16> +; CHECK-NEXT: [[EXT_2:%.*]] = sext <3 x i16> [[EXT_1]] to <3 x i32> ; CHECK-NEXT: ret <3 x i32> [[EXT_2]] ; entry: @@ -488,9 +488,9 @@ define <4 x i32> @load_i32_insert_idx_1(ptr %di) { ; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[DI]], align 4 ; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i32> , i32 [[L]], i64 1 ; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i32> [[VEC_INS]] to <8 x i8> -; CHECK-NEXT: [[EXT_1:%.*]] = sext <8 x i8> [[VEC_BC]] to <8 x i16> -; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i16> [[EXT_1]], <8 x i16> poison, <4 x i32> -; CHECK-NEXT: [[EXT_2:%.*]] = sext <4 x i16> [[VEC_SHUFFLE]] to <4 x i32> +; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i8> [[VEC_BC]], <8 x i8> poison, <4 x i32> +; CHECK-NEXT: [[EXT_1:%.*]] = sext <4 x i8> [[VEC_SHUFFLE]] to <4 x i16> +; CHECK-NEXT: [[EXT_2:%.*]] = sext <4 x i16> [[EXT_1]] to <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[EXT_2]] ; entry: @@ -510,9 +510,9 @@ define <4 x i32> @mask_extracts_not_all_elements_1(ptr %di) { ; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[DI]], align 4 ; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i32> , i32 [[L]], i64 0 ; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i32> [[VEC_INS]] to <8 x i8> -; CHECK-NEXT: [[EXT_1:%.*]] = sext <8 x i8> [[VEC_BC]] to <8 x i16> -; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i16> [[EXT_1]], <8 x i16> poison, <4 x i32> -; CHECK-NEXT: [[EXT_2:%.*]] = sext <4 x i16> [[VEC_SHUFFLE]] to <4 x i32> +; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i8> [[VEC_BC]], <8 x i8> poison, <4 x i32> +; CHECK-NEXT: [[EXT_1:%.*]] = sext <4 x i8> [[VEC_SHUFFLE]] to <4 x i16> +; CHECK-NEXT: [[EXT_2:%.*]] = sext <4 x i16> [[EXT_1]] to <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[EXT_2]] ; entry: @@ -532,9 +532,9 @@ define <4 x i32> @mask_extracts_not_all_elements_2(ptr %di) { ; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[DI]], align 4 ; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i32> , i32 [[L]], i64 0 ; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i32> [[VEC_INS]] to <8 x i8> -; CHECK-NEXT: [[EXT_1:%.*]] = sext <8 x i8> [[VEC_BC]] to <8 x i16> -; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i16> [[EXT_1]], <8 x i16> poison, <4 x i32> -; CHECK-NEXT: [[EXT_2:%.*]] = sext <4 x i16> [[VEC_SHUFFLE]] to <4 x i32> +; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i8> [[VEC_BC]], <8 x i8> poison, <4 x i32> +; CHECK-NEXT: [[EXT_1:%.*]] = sext <4 x i8> [[VEC_SHUFFLE]] to <4 x i16> +; CHECK-NEXT: [[EXT_2:%.*]] = sext <4 x i16> [[EXT_1]] to <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[EXT_2]] ; entry: From e4b329f2fba9bf042f790a7b9cfca4acf1c35aec Mon Sep 17 00:00:00 2001 From: Chaitanya Koparkar Date: Thu, 25 Sep 2025 06:56:16 -0400 Subject: [PATCH 4/5] Add tests, remove comment --- .../Transforms/Vectorize/VectorCombine.cpp | 9 +-- .../VectorCombine/X86/shuffle-of-casts.ll | 68 +++++++++++++++++++ 2 files changed, 71 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index 2a9f637791dfe..54af105383be0 100644 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -2487,10 +2487,8 @@ bool VectorCombine::foldShuffleOfCastops(Instruction &I) { if (!match(&I, m_Shuffle(m_Value(V0), m_Value(V1), m_Mask(OldMask)))) return false; - // Check whether this is a unary shuffle. - // TODO: check if this can be extended to match undef or unused values, - // perhaps using ShuffleVectorInst::isSingleSource. - bool IsBinaryShuffle = !isa(V1); + // Check whether this is a binary shuffle. + bool IsBinaryShuffle = !isa(V1); auto *C0 = dyn_cast(V0); auto *C1 = dyn_cast(V1); @@ -2501,9 +2499,8 @@ bool VectorCombine::foldShuffleOfCastops(Instruction &I) { // If this is allowed, foldShuffleOfCastops can get stuck in a loop // with foldBitcastOfShuffle. Reject in favor of foldBitcastOfShuffle. - if (!IsBinaryShuffle && Opcode == Instruction::BitCast) { + if (!IsBinaryShuffle && Opcode == Instruction::BitCast) return false; - } if (IsBinaryShuffle) { if (C0->getSrcTy() != C1->getSrcTy()) diff --git a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-casts.ll b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-casts.ll index fba4b60ef417b..ef2b88da50891 100644 --- a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-casts.ll +++ b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-casts.ll @@ -342,3 +342,71 @@ define <16 x i32> @concat_sext_zext_v8i16_v16i32(<8 x i16> %a0, <8 x i16> %a1) { %r = shufflevector <8 x i32> %x0, <8 x i32> %x1, <16 x i32> ret <16 x i32> %r } + +; Unary shuffles + +define <4 x i16> @unary_shuffle_zext_v8i8_v4i16(<8 x i8> %a0) { +; CHECK-LABEL: define <4 x i16> @unary_shuffle_zext_v8i8_v4i16( +; CHECK-SAME: <8 x i8> [[A0:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i8> [[A0]], <8 x i8> poison, <4 x i32> +; CHECK-NEXT: [[X1:%.*]] = zext <4 x i8> [[VEC_SHUFFLE]] to <4 x i16> +; CHECK-NEXT: ret <4 x i16> [[X1]] +; + %x1 = zext <8 x i8> %a0 to <8 x i16> + %vec.shuffle = shufflevector <8 x i16> %x1, <8 x i16> poison, <4 x i32> + ret <4 x i16> %vec.shuffle +} + +define <4 x i16> @unary_shuffle_sext_v8i8_v4i16(<8 x i8> %a0) { +; CHECK-LABEL: define <4 x i16> @unary_shuffle_sext_v8i8_v4i16( +; CHECK-SAME: <8 x i8> [[A0:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i8> [[A0]], <8 x i8> poison, <4 x i32> +; CHECK-NEXT: [[X1:%.*]] = sext <4 x i8> [[VEC_SHUFFLE]] to <4 x i16> +; CHECK-NEXT: ret <4 x i16> [[X1]] +; + %x1 = sext <8 x i8> %a0 to <8 x i16> + %vec.shuffle = shufflevector <8 x i16> %x1, <8 x i16> poison, <4 x i32> + ret <4 x i16> %vec.shuffle +} + +define <4 x i16> @unary_shuffle_sext_v8i8_v4i16_undef(<8 x i8> %a0) { +; CHECK-LABEL: define <4 x i16> @unary_shuffle_sext_v8i8_v4i16_undef( +; CHECK-SAME: <8 x i8> [[A0:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i8> [[A0]], <8 x i8> poison, <4 x i32> +; CHECK-NEXT: [[X1:%.*]] = sext <4 x i8> [[VEC_SHUFFLE]] to <4 x i16> +; CHECK-NEXT: ret <4 x i16> [[X1]] +; + %x1 = sext <8 x i8> %a0 to <8 x i16> + %vec.shuffle = shufflevector <8 x i16> %x1, <8 x i16> undef, <4 x i32> + ret <4 x i16> %vec.shuffle +} + +; negative - avoid loop with foldBitcastOfShuffle + +define <2 x i32> @unary_shuffle_bitcast_v8i8_v2i32(<8 x i8> %a0) { +; CHECK-LABEL: define <2 x i32> @unary_shuffle_bitcast_v8i8_v2i32( +; CHECK-SAME: <8 x i8> [[A0:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[X1:%.*]] = bitcast <8 x i8> [[A0]] to <2 x i32> +; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <2 x i32> [[X1]], <2 x i32> poison, <2 x i32> +; CHECK-NEXT: ret <2 x i32> [[VEC_SHUFFLE]] +; + %x1 = bitcast <8 x i8> %a0 to <2 x i32> + %vec.shuffle = shufflevector <2 x i32> %x1, <2 x i32> poison, <2 x i32> + ret <2 x i32> %vec.shuffle +} + +; negative - multiuse + +define <4 x i16> @unary_shuffle_sext_v8i8_v4i16_multiuse(<8 x i8> %a0, ptr %a1) { +; CHECK-LABEL: define <4 x i16> @unary_shuffle_sext_v8i8_v4i16_multiuse( +; CHECK-SAME: <8 x i8> [[A0:%.*]], ptr [[A1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[X1:%.*]] = sext <8 x i8> [[A0]] to <8 x i16> +; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i16> [[X1]], <8 x i16> poison, <4 x i32> +; CHECK-NEXT: store <8 x i16> [[X1]], ptr [[A1]], align 16 +; CHECK-NEXT: ret <4 x i16> [[VEC_SHUFFLE]] +; + %x1 = sext <8 x i8> %a0 to <8 x i16> + %vec.shuffle = shufflevector <8 x i16> %x1, <8 x i16> poison, <4 x i32> + store <8 x i16> %x1, ptr %a1, align 16 + ret <4 x i16> %vec.shuffle +} From 4f5f5da95a76f1c2aabeb1a4753536426d62e529 Mon Sep 17 00:00:00 2001 From: Chaitanya Koparkar Date: Thu, 25 Sep 2025 07:23:13 -0400 Subject: [PATCH 5/5] Remove test that uses undef --- .../Transforms/VectorCombine/X86/shuffle-of-casts.ll | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-casts.ll b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-casts.ll index ef2b88da50891..82a739964c9d0 100644 --- a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-casts.ll +++ b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-casts.ll @@ -369,18 +369,6 @@ define <4 x i16> @unary_shuffle_sext_v8i8_v4i16(<8 x i8> %a0) { ret <4 x i16> %vec.shuffle } -define <4 x i16> @unary_shuffle_sext_v8i8_v4i16_undef(<8 x i8> %a0) { -; CHECK-LABEL: define <4 x i16> @unary_shuffle_sext_v8i8_v4i16_undef( -; CHECK-SAME: <8 x i8> [[A0:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i8> [[A0]], <8 x i8> poison, <4 x i32> -; CHECK-NEXT: [[X1:%.*]] = sext <4 x i8> [[VEC_SHUFFLE]] to <4 x i16> -; CHECK-NEXT: ret <4 x i16> [[X1]] -; - %x1 = sext <8 x i8> %a0 to <8 x i16> - %vec.shuffle = shufflevector <8 x i16> %x1, <8 x i16> undef, <4 x i32> - ret <4 x i16> %vec.shuffle -} - ; negative - avoid loop with foldBitcastOfShuffle define <2 x i32> @unary_shuffle_bitcast_v8i8_v2i32(<8 x i8> %a0) {