diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index 0ef933f596604..54af105383be0 100644 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -2487,21 +2487,31 @@ bool VectorCombine::foldShuffleOfCastops(Instruction &I) { if (!match(&I, m_Shuffle(m_Value(V0), m_Value(V1), m_Mask(OldMask)))) return false; + // Check whether this is a binary shuffle. + bool IsBinaryShuffle = !isa(V1); + auto *C0 = dyn_cast(V0); auto *C1 = dyn_cast(V1); - if (!C0 || !C1) + if (!C0 || (IsBinaryShuffle && !C1)) return false; Instruction::CastOps Opcode = C0->getOpcode(); - if (C0->getSrcTy() != C1->getSrcTy()) + + // If this is allowed, foldShuffleOfCastops can get stuck in a loop + // with foldBitcastOfShuffle. Reject in favor of foldBitcastOfShuffle. + if (!IsBinaryShuffle && Opcode == Instruction::BitCast) return false; - // Handle shuffle(zext_nneg(x), sext(y)) -> sext(shuffle(x,y)) folds. - if (Opcode != C1->getOpcode()) { - if (match(C0, m_SExtLike(m_Value())) && match(C1, m_SExtLike(m_Value()))) - Opcode = Instruction::SExt; - else + if (IsBinaryShuffle) { + if (C0->getSrcTy() != C1->getSrcTy()) return false; + // Handle shuffle(zext_nneg(x), sext(y)) -> sext(shuffle(x,y)) folds. + if (Opcode != C1->getOpcode()) { + if (match(C0, m_SExtLike(m_Value())) && match(C1, m_SExtLike(m_Value()))) + Opcode = Instruction::SExt; + else + return false; + } } auto *ShuffleDstTy = dyn_cast(I.getType()); @@ -2544,23 +2554,31 @@ bool VectorCombine::foldShuffleOfCastops(Instruction &I) { InstructionCost CostC0 = TTI.getCastInstrCost(C0->getOpcode(), CastDstTy, CastSrcTy, TTI::CastContextHint::None, CostKind); - InstructionCost CostC1 = - TTI.getCastInstrCost(C1->getOpcode(), CastDstTy, CastSrcTy, - TTI::CastContextHint::None, CostKind); - InstructionCost OldCost = CostC0 + CostC1; - OldCost += - TTI.getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc, ShuffleDstTy, - CastDstTy, OldMask, CostKind, 0, nullptr, {}, &I); - InstructionCost NewCost = - TTI.getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc, NewShuffleDstTy, - CastSrcTy, NewMask, CostKind); + TargetTransformInfo::ShuffleKind ShuffleKind; + if (IsBinaryShuffle) + ShuffleKind = TargetTransformInfo::SK_PermuteTwoSrc; + else + ShuffleKind = TargetTransformInfo::SK_PermuteSingleSrc; + + InstructionCost OldCost = CostC0; + OldCost += TTI.getShuffleCost(ShuffleKind, ShuffleDstTy, CastDstTy, OldMask, + CostKind, 0, nullptr, {}, &I); + + InstructionCost NewCost = TTI.getShuffleCost(ShuffleKind, NewShuffleDstTy, + CastSrcTy, NewMask, CostKind); NewCost += TTI.getCastInstrCost(Opcode, ShuffleDstTy, NewShuffleDstTy, TTI::CastContextHint::None, CostKind); if (!C0->hasOneUse()) NewCost += CostC0; - if (!C1->hasOneUse()) - NewCost += CostC1; + if (IsBinaryShuffle) { + InstructionCost CostC1 = + TTI.getCastInstrCost(C1->getOpcode(), CastDstTy, CastSrcTy, + TTI::CastContextHint::None, CostKind); + OldCost += CostC1; + if (!C1->hasOneUse()) + NewCost += CostC1; + } LLVM_DEBUG(dbgs() << "Found a shuffle feeding two casts: " << I << "\n OldCost: " << OldCost << " vs NewCost: " << NewCost @@ -2568,14 +2586,20 @@ bool VectorCombine::foldShuffleOfCastops(Instruction &I) { if (NewCost > OldCost) return false; - Value *Shuf = Builder.CreateShuffleVector(C0->getOperand(0), - C1->getOperand(0), NewMask); + Value *Shuf; + if (IsBinaryShuffle) + Shuf = Builder.CreateShuffleVector(C0->getOperand(0), C1->getOperand(0), + NewMask); + else + Shuf = Builder.CreateShuffleVector(C0->getOperand(0), NewMask); + Value *Cast = Builder.CreateCast(Opcode, Shuf, ShuffleDstTy); // Intersect flags from the old casts. if (auto *NewInst = dyn_cast(Cast)) { NewInst->copyIRFlags(C0); - NewInst->andIRFlags(C1); + if (IsBinaryShuffle) + NewInst->andIRFlags(C1); } Worklist.pushValue(Shuf); diff --git a/llvm/test/Transforms/VectorCombine/AArch64/combine-shuffle-ext.ll b/llvm/test/Transforms/VectorCombine/AArch64/combine-shuffle-ext.ll index 6341c8945247d..1503a1b51d256 100644 --- a/llvm/test/Transforms/VectorCombine/AArch64/combine-shuffle-ext.ll +++ b/llvm/test/Transforms/VectorCombine/AArch64/combine-shuffle-ext.ll @@ -14,9 +14,9 @@ define <4 x i32> @load_i32_zext_to_v4i32(ptr %di) { ; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[DI]], align 4 ; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i32> , i32 [[L]], i64 0 ; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i32> [[VEC_INS]] to <8 x i8> -; CHECK-NEXT: [[E_1:%.*]] = zext <8 x i8> [[VEC_BC]] to <8 x i16> -; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i16> [[E_1]], <8 x i16> poison, <4 x i32> -; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <4 x i16> [[VEC_SHUFFLE]] to <4 x i32> +; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i8> [[VEC_BC]], <8 x i8> poison, <4 x i32> +; CHECK-NEXT: [[E_1:%.*]] = zext <4 x i8> [[VEC_SHUFFLE]] to <4 x i16> +; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <4 x i16> [[E_1]] to <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[EXT_2]] ; entry: @@ -36,9 +36,9 @@ define <4 x i32> @load_i32_zext_to_v4i32_both_nneg(ptr %di) { ; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[DI]], align 4 ; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i32> , i32 [[L]], i64 0 ; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i32> [[VEC_INS]] to <8 x i8> -; CHECK-NEXT: [[E_1:%.*]] = zext nneg <8 x i8> [[VEC_BC]] to <8 x i16> -; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i16> [[E_1]], <8 x i16> poison, <4 x i32> -; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <4 x i16> [[VEC_SHUFFLE]] to <4 x i32> +; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i8> [[VEC_BC]], <8 x i8> poison, <4 x i32> +; CHECK-NEXT: [[E_1:%.*]] = zext nneg <4 x i8> [[VEC_SHUFFLE]] to <4 x i16> +; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <4 x i16> [[E_1]] to <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[EXT_2]] ; entry: @@ -58,9 +58,9 @@ define <4 x i32> @load_i32_zext_to_v4i32_inner_nneg(ptr %di) { ; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[DI]], align 4 ; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i32> , i32 [[L]], i64 0 ; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i32> [[VEC_INS]] to <8 x i8> -; CHECK-NEXT: [[E_1:%.*]] = zext nneg <8 x i8> [[VEC_BC]] to <8 x i16> -; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i16> [[E_1]], <8 x i16> poison, <4 x i32> -; CHECK-NEXT: [[EXT_2:%.*]] = zext <4 x i16> [[VEC_SHUFFLE]] to <4 x i32> +; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i8> [[VEC_BC]], <8 x i8> poison, <4 x i32> +; CHECK-NEXT: [[E_1:%.*]] = zext nneg <4 x i8> [[VEC_SHUFFLE]] to <4 x i16> +; CHECK-NEXT: [[EXT_2:%.*]] = zext <4 x i16> [[E_1]] to <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[EXT_2]] ; entry: @@ -80,9 +80,9 @@ define <4 x i32> @load_i32_zext_to_v4i32_outer_nneg(ptr %di) { ; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[DI]], align 4 ; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i32> , i32 [[L]], i64 0 ; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i32> [[VEC_INS]] to <8 x i8> -; CHECK-NEXT: [[E_1:%.*]] = zext <8 x i8> [[VEC_BC]] to <8 x i16> -; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i16> [[E_1]], <8 x i16> poison, <4 x i32> -; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <4 x i16> [[VEC_SHUFFLE]] to <4 x i32> +; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i8> [[VEC_BC]], <8 x i8> poison, <4 x i32> +; CHECK-NEXT: [[E_1:%.*]] = zext <4 x i8> [[VEC_SHUFFLE]] to <4 x i16> +; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <4 x i16> [[E_1]] to <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[EXT_2]] ; entry: @@ -102,9 +102,9 @@ define <4 x i32> @load_i32_zext_to_v4i32_inner_nneg_outer_sext(ptr %di) { ; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[DI]], align 4 ; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i32> , i32 [[L]], i64 0 ; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i32> [[VEC_INS]] to <8 x i8> -; CHECK-NEXT: [[E_1:%.*]] = zext nneg <8 x i8> [[VEC_BC]] to <8 x i16> -; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i16> [[E_1]], <8 x i16> poison, <4 x i32> -; CHECK-NEXT: [[EXT_2:%.*]] = sext <4 x i16> [[VEC_SHUFFLE]] to <4 x i32> +; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i8> [[VEC_BC]], <8 x i8> poison, <4 x i32> +; CHECK-NEXT: [[E_1:%.*]] = zext nneg <4 x i8> [[VEC_SHUFFLE]] to <4 x i16> +; CHECK-NEXT: [[EXT_2:%.*]] = sext <4 x i16> [[E_1]] to <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[EXT_2]] ; entry: @@ -125,9 +125,9 @@ define <4 x i32> @load_i32_zext_to_v4i32_clobber_after_load(ptr %di) { ; CHECK-NEXT: call void @use.i32(i32 0) ; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i32> , i32 [[L]], i64 0 ; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i32> [[VEC_INS]] to <8 x i8> -; CHECK-NEXT: [[E_1:%.*]] = zext <8 x i8> [[VEC_BC]] to <8 x i16> -; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i16> [[E_1]], <8 x i16> poison, <4 x i32> -; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <4 x i16> [[VEC_SHUFFLE]] to <4 x i32> +; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i8> [[VEC_BC]], <8 x i8> poison, <4 x i32> +; CHECK-NEXT: [[E_1:%.*]] = zext <4 x i8> [[VEC_SHUFFLE]] to <4 x i16> +; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <4 x i16> [[E_1]] to <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[EXT_2]] ; entry: @@ -148,9 +148,9 @@ define <4 x i32> @load_i32_sext_zext_to_v4i32(ptr %di) { ; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[DI]], align 4 ; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i32> , i32 [[L]], i64 0 ; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i32> [[VEC_INS]] to <8 x i8> -; CHECK-NEXT: [[E_1:%.*]] = sext <8 x i8> [[VEC_BC]] to <8 x i16> -; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i16> [[E_1]], <8 x i16> poison, <4 x i32> -; CHECK-NEXT: [[EXT_2:%.*]] = zext <4 x i16> [[VEC_SHUFFLE]] to <4 x i32> +; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i8> [[VEC_BC]], <8 x i8> poison, <4 x i32> +; CHECK-NEXT: [[E_1:%.*]] = sext <4 x i8> [[VEC_SHUFFLE]] to <4 x i16> +; CHECK-NEXT: [[EXT_2:%.*]] = zext <4 x i16> [[E_1]] to <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[EXT_2]] ; entry: @@ -170,9 +170,9 @@ define <4 x i32> @load_i32_zext_to_v4i32_load_other_users(ptr %di) { ; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[DI]], align 4 ; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i32> , i32 [[L]], i64 0 ; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i32> [[VEC_INS]] to <8 x i8> -; CHECK-NEXT: [[E_1:%.*]] = zext <8 x i8> [[VEC_BC]] to <8 x i16> -; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i16> [[E_1]], <8 x i16> poison, <4 x i32> -; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <4 x i16> [[VEC_SHUFFLE]] to <4 x i32> +; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i8> [[VEC_BC]], <8 x i8> poison, <4 x i32> +; CHECK-NEXT: [[E_1:%.*]] = zext <4 x i8> [[VEC_SHUFFLE]] to <4 x i16> +; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <4 x i16> [[E_1]] to <4 x i32> ; CHECK-NEXT: call void @use.i32(i32 [[L]]) ; CHECK-NEXT: ret <4 x i32> [[EXT_2]] ; @@ -194,9 +194,9 @@ define <4 x i32> @load_i32_zext_to_v4i32_ins_other_users(ptr %di) { ; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[DI]], align 4 ; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i32> , i32 [[L]], i64 0 ; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i32> [[VEC_INS]] to <8 x i8> -; CHECK-NEXT: [[E_1:%.*]] = zext <8 x i8> [[VEC_BC]] to <8 x i16> -; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i16> [[E_1]], <8 x i16> poison, <4 x i32> -; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <4 x i16> [[VEC_SHUFFLE]] to <4 x i32> +; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i8> [[VEC_BC]], <8 x i8> poison, <4 x i32> +; CHECK-NEXT: [[E_1:%.*]] = zext <4 x i8> [[VEC_SHUFFLE]] to <4 x i16> +; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <4 x i16> [[E_1]] to <4 x i32> ; CHECK-NEXT: call void @use.v2i32(<2 x i32> [[VEC_INS]]) ; CHECK-NEXT: ret <4 x i32> [[EXT_2]] ; @@ -218,9 +218,9 @@ define <4 x i32> @load_i32_zext_to_v4i32_bc_other_users(ptr %di) { ; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[DI]], align 4 ; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i32> , i32 [[L]], i64 0 ; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i32> [[VEC_INS]] to <8 x i8> -; CHECK-NEXT: [[E_1:%.*]] = zext <8 x i8> [[VEC_BC]] to <8 x i16> -; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i16> [[E_1]], <8 x i16> poison, <4 x i32> -; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <4 x i16> [[VEC_SHUFFLE]] to <4 x i32> +; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i8> [[VEC_BC]], <8 x i8> poison, <4 x i32> +; CHECK-NEXT: [[E_1:%.*]] = zext <4 x i8> [[VEC_SHUFFLE]] to <4 x i16> +; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <4 x i16> [[E_1]] to <4 x i32> ; CHECK-NEXT: call void @use.v8i8(<8 x i8> [[VEC_BC]]) ; CHECK-NEXT: ret <4 x i32> [[EXT_2]] ; @@ -266,10 +266,10 @@ define <4 x i32> @load_i32_zext_to_v4i32_shuffle_other_users(ptr %di) { ; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[DI]], align 4 ; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i32> , i32 [[L]], i64 0 ; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i32> [[VEC_INS]] to <8 x i8> -; CHECK-NEXT: [[E_1:%.*]] = zext <8 x i8> [[VEC_BC]] to <8 x i16> -; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i16> [[E_1]], <8 x i16> poison, <4 x i32> -; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <4 x i16> [[VEC_SHUFFLE]] to <4 x i32> -; CHECK-NEXT: call void @use.v8i16(<4 x i16> [[VEC_SHUFFLE]]) +; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i8> [[VEC_BC]], <8 x i8> poison, <4 x i32> +; CHECK-NEXT: [[E_1:%.*]] = zext <4 x i8> [[VEC_SHUFFLE]] to <4 x i16> +; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <4 x i16> [[E_1]] to <4 x i32> +; CHECK-NEXT: call void @use.v8i16(<4 x i16> [[E_1]]) ; CHECK-NEXT: ret <4 x i32> [[EXT_2]] ; entry: @@ -290,9 +290,9 @@ define <8 x i32> @load_i64_zext_to_v8i32(ptr %di) { ; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[DI]], align 8 ; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i64> , i64 [[L]], i64 0 ; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i64> [[VEC_INS]] to <16 x i8> -; CHECK-NEXT: [[EXT_1:%.*]] = zext <16 x i8> [[VEC_BC]] to <16 x i16> -; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <16 x i16> [[EXT_1]], <16 x i16> poison, <8 x i32> -; CHECK-NEXT: [[OUTER_EXT:%.*]] = zext nneg <8 x i16> [[VEC_SHUFFLE]] to <8 x i32> +; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <16 x i8> [[VEC_BC]], <16 x i8> poison, <8 x i32> +; CHECK-NEXT: [[EXT_1:%.*]] = zext <8 x i8> [[VEC_SHUFFLE]] to <8 x i16> +; CHECK-NEXT: [[OUTER_EXT:%.*]] = zext nneg <8 x i16> [[EXT_1]] to <8 x i32> ; CHECK-NEXT: ret <8 x i32> [[OUTER_EXT]] ; entry: @@ -312,9 +312,9 @@ define <3 x i32> @load_i24_zext_to_v3i32(ptr %di) { ; CHECK-NEXT: [[L:%.*]] = load i24, ptr [[DI]], align 4 ; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i24> , i24 [[L]], i64 0 ; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i24> [[VEC_INS]] to <6 x i8> -; CHECK-NEXT: [[EXT_1:%.*]] = zext <6 x i8> [[VEC_BC]] to <6 x i16> -; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <6 x i16> [[EXT_1]], <6 x i16> poison, <3 x i32> -; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <3 x i16> [[VEC_SHUFFLE]] to <3 x i32> +; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <6 x i8> [[VEC_BC]], <6 x i8> poison, <3 x i32> +; CHECK-NEXT: [[EXT_1:%.*]] = zext <3 x i8> [[VEC_SHUFFLE]] to <3 x i16> +; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <3 x i16> [[EXT_1]] to <3 x i32> ; CHECK-NEXT: ret <3 x i32> [[EXT_2]] ; entry: @@ -334,9 +334,9 @@ define <4 x i32> @load_i32_insert_idx_1_sext(ptr %di) { ; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[DI]], align 4 ; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i32> , i32 [[L]], i64 1 ; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i32> [[VEC_INS]] to <8 x i8> -; CHECK-NEXT: [[EXT_1:%.*]] = zext <8 x i8> [[VEC_BC]] to <8 x i16> -; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i16> [[EXT_1]], <8 x i16> poison, <4 x i32> -; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <4 x i16> [[VEC_SHUFFLE]] to <4 x i32> +; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i8> [[VEC_BC]], <8 x i8> poison, <4 x i32> +; CHECK-NEXT: [[EXT_1:%.*]] = zext <4 x i8> [[VEC_SHUFFLE]] to <4 x i16> +; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <4 x i16> [[EXT_1]] to <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[EXT_2]] ; entry: @@ -356,9 +356,9 @@ define <4 x i32> @mask_extracts_not_all_elements_1_sext(ptr %di) { ; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[DI]], align 4 ; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i32> , i32 [[L]], i64 0 ; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i32> [[VEC_INS]] to <8 x i8> -; CHECK-NEXT: [[EXT_1:%.*]] = zext <8 x i8> [[VEC_BC]] to <8 x i16> -; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i16> [[EXT_1]], <8 x i16> poison, <4 x i32> -; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <4 x i16> [[VEC_SHUFFLE]] to <4 x i32> +; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i8> [[VEC_BC]], <8 x i8> poison, <4 x i32> +; CHECK-NEXT: [[EXT_1:%.*]] = zext <4 x i8> [[VEC_SHUFFLE]] to <4 x i16> +; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <4 x i16> [[EXT_1]] to <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[EXT_2]] ; entry: @@ -378,9 +378,9 @@ define <4 x i32> @mask_extracts_not_all_elements_2_sext(ptr %di) { ; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[DI]], align 4 ; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i32> , i32 [[L]], i64 0 ; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i32> [[VEC_INS]] to <8 x i8> -; CHECK-NEXT: [[EXT_1:%.*]] = zext <8 x i8> [[VEC_BC]] to <8 x i16> -; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i16> [[EXT_1]], <8 x i16> poison, <4 x i32> -; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <4 x i16> [[VEC_SHUFFLE]] to <4 x i32> +; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i8> [[VEC_BC]], <8 x i8> poison, <4 x i32> +; CHECK-NEXT: [[EXT_1:%.*]] = zext <4 x i8> [[VEC_SHUFFLE]] to <4 x i16> +; CHECK-NEXT: [[EXT_2:%.*]] = zext nneg <4 x i16> [[EXT_1]] to <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[EXT_2]] ; entry: @@ -422,9 +422,9 @@ define <4 x i32> @load_i32_sext_to_v4i32(ptr %di) { ; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[DI]], align 4 ; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i32> , i32 [[L]], i64 0 ; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i32> [[VEC_INS]] to <8 x i8> -; CHECK-NEXT: [[E_1:%.*]] = sext <8 x i8> [[VEC_BC]] to <8 x i16> -; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i16> [[E_1]], <8 x i16> poison, <4 x i32> -; CHECK-NEXT: [[EXT_2:%.*]] = sext <4 x i16> [[VEC_SHUFFLE]] to <4 x i32> +; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i8> [[VEC_BC]], <8 x i8> poison, <4 x i32> +; CHECK-NEXT: [[E_1:%.*]] = sext <4 x i8> [[VEC_SHUFFLE]] to <4 x i16> +; CHECK-NEXT: [[EXT_2:%.*]] = sext <4 x i16> [[E_1]] to <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[EXT_2]] ; entry: @@ -444,9 +444,9 @@ define <8 x i32> @load_i64_sext_to_v8i32(ptr %di) { ; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[DI]], align 8 ; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i64> , i64 [[L]], i64 0 ; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i64> [[VEC_INS]] to <16 x i8> -; CHECK-NEXT: [[EXT_1:%.*]] = sext <16 x i8> [[VEC_BC]] to <16 x i16> -; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <16 x i16> [[EXT_1]], <16 x i16> poison, <8 x i32> -; CHECK-NEXT: [[OUTER_EXT:%.*]] = sext <8 x i16> [[VEC_SHUFFLE]] to <8 x i32> +; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <16 x i8> [[VEC_BC]], <16 x i8> poison, <8 x i32> +; CHECK-NEXT: [[EXT_1:%.*]] = sext <8 x i8> [[VEC_SHUFFLE]] to <8 x i16> +; CHECK-NEXT: [[OUTER_EXT:%.*]] = sext <8 x i16> [[EXT_1]] to <8 x i32> ; CHECK-NEXT: ret <8 x i32> [[OUTER_EXT]] ; entry: @@ -466,9 +466,9 @@ define <3 x i32> @load_i24_sext_to_v3i32(ptr %di) { ; CHECK-NEXT: [[L:%.*]] = load i24, ptr [[DI]], align 4 ; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i24> , i24 [[L]], i64 0 ; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i24> [[VEC_INS]] to <6 x i8> -; CHECK-NEXT: [[EXT_1:%.*]] = sext <6 x i8> [[VEC_BC]] to <6 x i16> -; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <6 x i16> [[EXT_1]], <6 x i16> poison, <3 x i32> -; CHECK-NEXT: [[EXT_2:%.*]] = sext <3 x i16> [[VEC_SHUFFLE]] to <3 x i32> +; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <6 x i8> [[VEC_BC]], <6 x i8> poison, <3 x i32> +; CHECK-NEXT: [[EXT_1:%.*]] = sext <3 x i8> [[VEC_SHUFFLE]] to <3 x i16> +; CHECK-NEXT: [[EXT_2:%.*]] = sext <3 x i16> [[EXT_1]] to <3 x i32> ; CHECK-NEXT: ret <3 x i32> [[EXT_2]] ; entry: @@ -488,9 +488,9 @@ define <4 x i32> @load_i32_insert_idx_1(ptr %di) { ; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[DI]], align 4 ; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i32> , i32 [[L]], i64 1 ; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i32> [[VEC_INS]] to <8 x i8> -; CHECK-NEXT: [[EXT_1:%.*]] = sext <8 x i8> [[VEC_BC]] to <8 x i16> -; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i16> [[EXT_1]], <8 x i16> poison, <4 x i32> -; CHECK-NEXT: [[EXT_2:%.*]] = sext <4 x i16> [[VEC_SHUFFLE]] to <4 x i32> +; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i8> [[VEC_BC]], <8 x i8> poison, <4 x i32> +; CHECK-NEXT: [[EXT_1:%.*]] = sext <4 x i8> [[VEC_SHUFFLE]] to <4 x i16> +; CHECK-NEXT: [[EXT_2:%.*]] = sext <4 x i16> [[EXT_1]] to <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[EXT_2]] ; entry: @@ -510,9 +510,9 @@ define <4 x i32> @mask_extracts_not_all_elements_1(ptr %di) { ; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[DI]], align 4 ; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i32> , i32 [[L]], i64 0 ; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i32> [[VEC_INS]] to <8 x i8> -; CHECK-NEXT: [[EXT_1:%.*]] = sext <8 x i8> [[VEC_BC]] to <8 x i16> -; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i16> [[EXT_1]], <8 x i16> poison, <4 x i32> -; CHECK-NEXT: [[EXT_2:%.*]] = sext <4 x i16> [[VEC_SHUFFLE]] to <4 x i32> +; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i8> [[VEC_BC]], <8 x i8> poison, <4 x i32> +; CHECK-NEXT: [[EXT_1:%.*]] = sext <4 x i8> [[VEC_SHUFFLE]] to <4 x i16> +; CHECK-NEXT: [[EXT_2:%.*]] = sext <4 x i16> [[EXT_1]] to <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[EXT_2]] ; entry: @@ -532,9 +532,9 @@ define <4 x i32> @mask_extracts_not_all_elements_2(ptr %di) { ; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[DI]], align 4 ; CHECK-NEXT: [[VEC_INS:%.*]] = insertelement <2 x i32> , i32 [[L]], i64 0 ; CHECK-NEXT: [[VEC_BC:%.*]] = bitcast <2 x i32> [[VEC_INS]] to <8 x i8> -; CHECK-NEXT: [[EXT_1:%.*]] = sext <8 x i8> [[VEC_BC]] to <8 x i16> -; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i16> [[EXT_1]], <8 x i16> poison, <4 x i32> -; CHECK-NEXT: [[EXT_2:%.*]] = sext <4 x i16> [[VEC_SHUFFLE]] to <4 x i32> +; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i8> [[VEC_BC]], <8 x i8> poison, <4 x i32> +; CHECK-NEXT: [[EXT_1:%.*]] = sext <4 x i8> [[VEC_SHUFFLE]] to <4 x i16> +; CHECK-NEXT: [[EXT_2:%.*]] = sext <4 x i16> [[EXT_1]] to <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[EXT_2]] ; entry: diff --git a/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll b/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll index acbc836ffcab0..ed29719d49493 100644 --- a/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll +++ b/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll @@ -205,8 +205,8 @@ define <8 x i8> @abs_different(<8 x i8> %a) { define <4 x i32> @poison_intrinsic(<2 x i16> %l256) { ; CHECK-LABEL: @poison_intrinsic( ; CHECK-NEXT: [[L266:%.*]] = call <2 x i16> @llvm.abs.v2i16(<2 x i16> [[L256:%.*]], i1 false) -; CHECK-NEXT: [[L267:%.*]] = zext <2 x i16> [[L266]] to <2 x i32> -; CHECK-NEXT: [[L271:%.*]] = shufflevector <2 x i32> [[L267]], <2 x i32> poison, <4 x i32> +; CHECK-NEXT: [[L267:%.*]] = shufflevector <2 x i16> [[L266]], <2 x i16> poison, <4 x i32> +; CHECK-NEXT: [[L271:%.*]] = zext <4 x i16> [[L267]] to <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[L271]] ; %l266 = call <2 x i16> @llvm.abs.v2i16(<2 x i16> %l256, i1 false) @@ -534,9 +534,9 @@ define <4 x i64> @single_zext(<4 x i32> %x) { define <4 x i64> @not_zext(<4 x i32> %x) { ; CHECK-LABEL: @not_zext( -; CHECK-NEXT: [[ZEXT:%.*]] = zext <4 x i32> [[X:%.*]] to <4 x i64> -; CHECK-NEXT: [[REVSHUF:%.*]] = shufflevector <4 x i64> [[ZEXT]], <4 x i64> poison, <4 x i32> -; CHECK-NEXT: ret <4 x i64> [[REVSHUF]] +; CHECK-NEXT: [[REVSHUF:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[ZEXT:%.*]] = zext <4 x i32> [[REVSHUF:%.*]] to <4 x i64> +; CHECK-NEXT: ret <4 x i64> [[ZEXT]] ; %zext = zext <4 x i32> %x to <4 x i64> %revshuf = shufflevector <4 x i64> %zext, <4 x i64> poison, <4 x i32> @@ -922,10 +922,9 @@ define <4 x i8> @singleop(<4 x i8> %a, <4 x i8> %b) { define <4 x i64> @cast_mismatched_types(<4 x i32> %x) { ; CHECK-LABEL: @cast_mismatched_types( -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <2 x i32> -; CHECK-NEXT: [[ZEXT:%.*]] = zext <2 x i32> [[SHUF]] to <2 x i64> -; CHECK-NEXT: [[EXTSHUF:%.*]] = shufflevector <2 x i64> [[ZEXT]], <2 x i64> poison, <4 x i32> -; CHECK-NEXT: ret <4 x i64> [[EXTSHUF]] +; CHECK-SAME: <4 x i32> [[X:%.*]]) { +; CHECK-NEXT: [[ZEXT:%.*]] = zext <4 x i32> [[X]] to <4 x i64> +; CHECK-NEXT: ret <4 x i64> [[ZEXT]] ; %shuf = shufflevector <4 x i32> %x, <4 x i32> poison, <2 x i32> %zext = zext <2 x i32> %shuf to <2 x i64> diff --git a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-casts.ll b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-casts.ll index fba4b60ef417b..82a739964c9d0 100644 --- a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-casts.ll +++ b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-casts.ll @@ -342,3 +342,59 @@ define <16 x i32> @concat_sext_zext_v8i16_v16i32(<8 x i16> %a0, <8 x i16> %a1) { %r = shufflevector <8 x i32> %x0, <8 x i32> %x1, <16 x i32> ret <16 x i32> %r } + +; Unary shuffles + +define <4 x i16> @unary_shuffle_zext_v8i8_v4i16(<8 x i8> %a0) { +; CHECK-LABEL: define <4 x i16> @unary_shuffle_zext_v8i8_v4i16( +; CHECK-SAME: <8 x i8> [[A0:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i8> [[A0]], <8 x i8> poison, <4 x i32> +; CHECK-NEXT: [[X1:%.*]] = zext <4 x i8> [[VEC_SHUFFLE]] to <4 x i16> +; CHECK-NEXT: ret <4 x i16> [[X1]] +; + %x1 = zext <8 x i8> %a0 to <8 x i16> + %vec.shuffle = shufflevector <8 x i16> %x1, <8 x i16> poison, <4 x i32> + ret <4 x i16> %vec.shuffle +} + +define <4 x i16> @unary_shuffle_sext_v8i8_v4i16(<8 x i8> %a0) { +; CHECK-LABEL: define <4 x i16> @unary_shuffle_sext_v8i8_v4i16( +; CHECK-SAME: <8 x i8> [[A0:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i8> [[A0]], <8 x i8> poison, <4 x i32> +; CHECK-NEXT: [[X1:%.*]] = sext <4 x i8> [[VEC_SHUFFLE]] to <4 x i16> +; CHECK-NEXT: ret <4 x i16> [[X1]] +; + %x1 = sext <8 x i8> %a0 to <8 x i16> + %vec.shuffle = shufflevector <8 x i16> %x1, <8 x i16> poison, <4 x i32> + ret <4 x i16> %vec.shuffle +} + +; negative - avoid loop with foldBitcastOfShuffle + +define <2 x i32> @unary_shuffle_bitcast_v8i8_v2i32(<8 x i8> %a0) { +; CHECK-LABEL: define <2 x i32> @unary_shuffle_bitcast_v8i8_v2i32( +; CHECK-SAME: <8 x i8> [[A0:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[X1:%.*]] = bitcast <8 x i8> [[A0]] to <2 x i32> +; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <2 x i32> [[X1]], <2 x i32> poison, <2 x i32> +; CHECK-NEXT: ret <2 x i32> [[VEC_SHUFFLE]] +; + %x1 = bitcast <8 x i8> %a0 to <2 x i32> + %vec.shuffle = shufflevector <2 x i32> %x1, <2 x i32> poison, <2 x i32> + ret <2 x i32> %vec.shuffle +} + +; negative - multiuse + +define <4 x i16> @unary_shuffle_sext_v8i8_v4i16_multiuse(<8 x i8> %a0, ptr %a1) { +; CHECK-LABEL: define <4 x i16> @unary_shuffle_sext_v8i8_v4i16_multiuse( +; CHECK-SAME: <8 x i8> [[A0:%.*]], ptr [[A1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[X1:%.*]] = sext <8 x i8> [[A0]] to <8 x i16> +; CHECK-NEXT: [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i16> [[X1]], <8 x i16> poison, <4 x i32> +; CHECK-NEXT: store <8 x i16> [[X1]], ptr [[A1]], align 16 +; CHECK-NEXT: ret <4 x i16> [[VEC_SHUFFLE]] +; + %x1 = sext <8 x i8> %a0 to <8 x i16> + %vec.shuffle = shufflevector <8 x i16> %x1, <8 x i16> poison, <4 x i32> + store <8 x i16> %x1, ptr %a1, align 16 + ret <4 x i16> %vec.shuffle +}