From 90bc448328bd815a3e891f02f719a26434dd6b58 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 3 Dec 2025 11:44:11 +0000 Subject: [PATCH] [VectorCombine][X86] foldShuffleOfIntrinsics - provide the arguments to a getShuffleCost call Ensure the arguments are passed to the getShuffleCost calls to improve cost analysis, in particular if these are constant the costs will be recognised as free Noticed while reviewing #170052 --- .../Transforms/Vectorize/VectorCombine.cpp | 5 +-- .../VectorCombine/X86/shuffle-of-fma-const.ll | 35 +++++++++---------- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index f1890e4f5fb95..e34a70c54ee4a 100644 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -2924,8 +2924,9 @@ bool VectorCombine::foldShuffleOfIntrinsics(Instruction &I) { auto *ArgTy = FixedVectorType::get(VecTy->getElementType(), ShuffleDstTy->getNumElements()); NewArgsTy.push_back(ArgTy); - NewCost += TTI.getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc, - ArgTy, VecTy, OldMask, CostKind); + NewCost += TTI.getShuffleCost( + TargetTransformInfo::SK_PermuteTwoSrc, ArgTy, VecTy, OldMask, + CostKind, 0, nullptr, {II0->getArgOperand(I), II1->getArgOperand(I)}); } } IntrinsicCostAttributes NewAttr(IID, ShuffleDstTy, NewArgsTy); diff --git a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-fma-const.ll b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-fma-const.ll index 4710bdee11472..b05f851a846f4 100644 --- a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-fma-const.ll +++ b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-fma-const.ll @@ -15,18 +15,11 @@ define <4 x float> @shuffle_fma_const_chain(<4 x float> %a0) { } define <8 x float> @concat_fma_const_chain(<4 x float> %a0, <4 x float> %a1) { -; SSE-LABEL: define <8 x float> @concat_fma_const_chain( -; SSE-SAME: <4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]]) #[[ATTR0]] { -; SSE-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A0]], <4 x float> [[A1]], <8 x i32> -; SSE-NEXT: [[RES:%.*]] = call <8 x float> @llvm.fma.v8f32(<8 x float> [[TMP1]], <8 x float> splat (float 0x3F8DE8D040000000), <8 x float> splat (float 0xBFB3715EE0000000)) -; SSE-NEXT: ret <8 x float> [[RES]] -; -; AVX-LABEL: define <8 x float> @concat_fma_const_chain( -; AVX-SAME: <4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]]) #[[ATTR0]] { -; AVX-NEXT: [[L:%.*]] = tail call noundef <4 x float> @llvm.fma.v4f32(<4 x float> [[A0]], <4 x float> splat (float 0x3F8DE8D040000000), <4 x float> splat (float 0xBFB3715EE0000000)) -; AVX-NEXT: [[H:%.*]] = tail call noundef <4 x float> @llvm.fma.v4f32(<4 x float> [[A1]], <4 x float> splat (float 0x3F8DE8D040000000), <4 x float> splat (float 0xBFB3715EE0000000)) -; AVX-NEXT: [[RES:%.*]] = shufflevector <4 x float> [[L]], <4 x float> [[H]], <8 x i32> -; AVX-NEXT: ret <8 x float> [[RES]] +; CHECK-LABEL: define <8 x float> @concat_fma_const_chain( +; CHECK-SAME: <4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A0]], <4 x float> [[A1]], <8 x i32> +; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.fma.v8f32(<8 x float> [[TMP1]], <8 x float> splat (float 0x3F8DE8D040000000), <8 x float> splat (float 0xBFB3715EE0000000)) +; CHECK-NEXT: ret <8 x float> [[RES]] ; %l = tail call noundef <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> splat (float 0x3F8DE8D040000000), <4 x float> splat (float 0xBFB3715EE0000000)) %h = tail call noundef <4 x float> @llvm.fma.v4f32(<4 x float> %a1, <4 x float> splat (float 0x3F8DE8D040000000), <4 x float> splat (float 0xBFB3715EE0000000)) @@ -35,12 +28,18 @@ define <8 x float> @concat_fma_const_chain(<4 x float> %a0, <4 x float> %a1) { } define <8 x float> @interleave_fma_const_chain(<4 x float> %a0, <4 x float> %a1) { -; CHECK-LABEL: define <8 x float> @interleave_fma_const_chain( -; CHECK-SAME: <4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[L:%.*]] = tail call noundef <4 x float> @llvm.fma.v4f32(<4 x float> [[A0]], <4 x float> splat (float 0x3F8DE8D040000000), <4 x float> splat (float 0xBFB3715EE0000000)) -; CHECK-NEXT: [[H:%.*]] = tail call noundef <4 x float> @llvm.fma.v4f32(<4 x float> [[A1]], <4 x float> splat (float 0x3F8DE8D040000000), <4 x float> splat (float 0xBFB3715EE0000000)) -; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x float> [[L]], <4 x float> [[H]], <8 x i32> -; CHECK-NEXT: ret <8 x float> [[RES]] +; SSE-LABEL: define <8 x float> @interleave_fma_const_chain( +; SSE-SAME: <4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]]) #[[ATTR0]] { +; SSE-NEXT: [[L:%.*]] = tail call noundef <4 x float> @llvm.fma.v4f32(<4 x float> [[A0]], <4 x float> splat (float 0x3F8DE8D040000000), <4 x float> splat (float 0xBFB3715EE0000000)) +; SSE-NEXT: [[H:%.*]] = tail call noundef <4 x float> @llvm.fma.v4f32(<4 x float> [[A1]], <4 x float> splat (float 0x3F8DE8D040000000), <4 x float> splat (float 0xBFB3715EE0000000)) +; SSE-NEXT: [[RES:%.*]] = shufflevector <4 x float> [[L]], <4 x float> [[H]], <8 x i32> +; SSE-NEXT: ret <8 x float> [[RES]] +; +; AVX-LABEL: define <8 x float> @interleave_fma_const_chain( +; AVX-SAME: <4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]]) #[[ATTR0]] { +; AVX-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A0]], <4 x float> [[A1]], <8 x i32> +; AVX-NEXT: [[RES:%.*]] = call <8 x float> @llvm.fma.v8f32(<8 x float> [[TMP1]], <8 x float> splat (float 0x3F8DE8D040000000), <8 x float> splat (float 0xBFB3715EE0000000)) +; AVX-NEXT: ret <8 x float> [[RES]] ; %l = tail call noundef <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> splat (float 0x3F8DE8D040000000), <4 x float> splat (float 0xBFB3715EE0000000)) %h = tail call noundef <4 x float> @llvm.fma.v4f32(<4 x float> %a1, <4 x float> splat (float 0x3F8DE8D040000000), <4 x float> splat (float 0xBFB3715EE0000000))