From 73177d580f88ddafea174a62be6edcfafb5e862a Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 29 Sep 2025 18:07:52 +0100 Subject: [PATCH] [X86] canCreateUndefOrPoisonForTargetNode/isGuaranteedNotToBeUndefOrPoisonForTargetNode - add X86ISD::INSERTPS handling X86ISD::INSERTPS shuffles can't create undef/poison itself, allowing us to fold freeze(insertps(x,y,i)) -> insertps(freeze(x),freeze(y),i) --- llvm/lib/Target/X86/X86ISelLowering.cpp | 2 ++ llvm/test/CodeGen/X86/vector-shuffle-combining-sse41.ll | 5 +---- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 292eab77e2002..cd04ff5bc7ef4 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -45169,6 +45169,7 @@ bool X86TargetLowering::isGuaranteedNotToBeUndefOrPoisonForTargetNode( case X86ISD::Wrapper: case X86ISD::WrapperRIP: return true; + case X86ISD::INSERTPS: case X86ISD::BLENDI: case X86ISD::PSHUFB: case X86ISD::PSHUFD: @@ -45239,6 +45240,7 @@ bool X86TargetLowering::canCreateUndefOrPoisonForTargetNode( case X86ISD::BLENDV: return false; // SSE target shuffles. + case X86ISD::INSERTPS: case X86ISD::PSHUFB: case X86ISD::PSHUFD: case X86ISD::UNPCKL: diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-sse41.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-sse41.ll index bec33492bbf1e..3590c4d027be7 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining-sse41.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-sse41.ll @@ -62,15 +62,12 @@ define <4 x i32> @combine_blend_of_permutes_v4i32(<2 x i64> %a0, <2 x i64> %a1) define <4 x float> @freeze_insertps(<4 x float> %a0, <4 x float> %a1) { ; SSE-LABEL: freeze_insertps: ; SSE: # %bb.0: -; SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] -; SSE-NEXT: insertps {{.*#+}} xmm1 = xmm0[1],xmm1[1,2,3] ; SSE-NEXT: movaps %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: freeze_insertps: ; AVX: # %bb.0: -; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] -; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[1],xmm1[1,2,3] +; AVX-NEXT: vmovaps %xmm1, %xmm0 ; AVX-NEXT: retq %s0 = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i8 16) %f0 = freeze <4 x float> %s0