diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index e10b8bc8c5e2e..24f69ea1b742a 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -1455,6 +1455,9 @@ SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) { // First store the whole vector. SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo); + // Freeze the index so we don't poison the clamping code we're about to emit. + Idx = DAG.getFreeze(Idx); + // Then store the inserted part. if (PartVT.isVector()) { SDValue SubStackPtr = diff --git a/llvm/test/CodeGen/AArch64/pr86717.ll b/llvm/test/CodeGen/AArch64/pr86717.ll index aac0c0df5f51a..aa8be954be72d 100644 --- a/llvm/test/CodeGen/AArch64/pr86717.ll +++ b/llvm/test/CodeGen/AArch64/pr86717.ll @@ -10,10 +10,10 @@ define <16 x i8> @f(i32 %0) { ; CHECK-NEXT: mov w8, #1 // =0x1 ; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: sub w8, w8, w0 -; CHECK-NEXT: mov w10, #3 // =0x3 -; CHECK-NEXT: orr x8, x9, x8 +; CHECK-NEXT: bfxil x9, x8, #0, #4 +; CHECK-NEXT: mov w8, #3 // =0x3 ; CHECK-NEXT: str q0, [sp] -; CHECK-NEXT: strb w10, [x8] +; CHECK-NEXT: strb w8, [x9] ; CHECK-NEXT: ldr q0, [sp], #16 ; CHECK-NEXT: ret %2 = sub nuw i32 1, %0 diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insertelement.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insertelement.ll index 25106b456d2f7..6629d34405492 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insertelement.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insertelement.ll @@ -123,9 +123,10 @@ define void @insert_32xi8_idx(ptr %src, ptr %dst, i8 %in, i32 %idx) nounwind { ; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 ; CHECK-NEXT: xvld $xr0, $a0, 0 ; CHECK-NEXT: xvst $xr0, $sp, 0 -; CHECK-NEXT: addi.d $a0, $sp, 0 -; CHECK-NEXT: bstrins.d $a0, $a3, 4, 0 -; CHECK-NEXT: st.b $a2, $a0, 0 +; CHECK-NEXT: bstrpick.d $a0, $a3, 31, 0 +; CHECK-NEXT: addi.d $a3, $sp, 0 +; CHECK-NEXT: bstrins.d $a3, $a0, 4, 0 +; CHECK-NEXT: st.b $a2, $a3, 0 ; CHECK-NEXT: xvld $xr0, $sp, 0 ; CHECK-NEXT: xvst $xr0, $a1, 0 ; CHECK-NEXT: addi.d $sp, $fp, -64 @@ -149,9 +150,10 @@ define void @insert_16xi16_idx(ptr %src, ptr %dst, i16 %in, i32 %idx) nounwind { ; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 ; CHECK-NEXT: xvld $xr0, $a0, 0 ; CHECK-NEXT: xvst $xr0, $sp, 0 -; CHECK-NEXT: addi.d $a0, $sp, 0 -; CHECK-NEXT: bstrins.d $a0, $a3, 4, 1 -; CHECK-NEXT: st.h $a2, $a0, 0 +; CHECK-NEXT: bstrpick.d $a0, $a3, 31, 0 +; CHECK-NEXT: addi.d $a3, $sp, 0 +; CHECK-NEXT: bstrins.d $a3, $a0, 4, 1 +; CHECK-NEXT: st.h $a2, $a3, 0 ; CHECK-NEXT: xvld $xr0, $sp, 0 ; CHECK-NEXT: xvst $xr0, $a1, 0 ; CHECK-NEXT: addi.d $sp, $fp, -64 @@ -175,9 +177,10 @@ define void @insert_8xi32_idx(ptr %src, ptr %dst, i32 %in, i32 %idx) nounwind { ; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 ; CHECK-NEXT: xvld $xr0, $a0, 0 ; CHECK-NEXT: xvst $xr0, $sp, 0 -; CHECK-NEXT: addi.d $a0, $sp, 0 -; CHECK-NEXT: bstrins.d $a0, $a3, 4, 2 -; CHECK-NEXT: st.w $a2, $a0, 0 +; CHECK-NEXT: bstrpick.d $a0, $a3, 31, 0 +; CHECK-NEXT: addi.d $a3, $sp, 0 +; CHECK-NEXT: bstrins.d $a3, $a0, 4, 2 +; CHECK-NEXT: st.w $a2, $a3, 0 ; CHECK-NEXT: xvld $xr0, $sp, 0 ; CHECK-NEXT: xvst $xr0, $a1, 0 ; CHECK-NEXT: addi.d $sp, $fp, -64 @@ -201,9 +204,10 @@ define void @insert_4xi64_idx(ptr %src, ptr %dst, i64 %in, i32 %idx) nounwind { ; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 ; CHECK-NEXT: xvld $xr0, $a0, 0 ; CHECK-NEXT: xvst $xr0, $sp, 0 -; CHECK-NEXT: addi.d $a0, $sp, 0 -; CHECK-NEXT: bstrins.d $a0, $a3, 4, 3 -; CHECK-NEXT: st.d $a2, $a0, 0 +; CHECK-NEXT: bstrpick.d $a0, $a3, 31, 0 +; CHECK-NEXT: addi.d $a3, $sp, 0 +; CHECK-NEXT: bstrins.d $a3, $a0, 4, 3 +; CHECK-NEXT: st.d $a2, $a3, 0 ; CHECK-NEXT: xvld $xr0, $sp, 0 ; CHECK-NEXT: xvst $xr0, $a1, 0 ; CHECK-NEXT: addi.d $sp, $fp, -64 @@ -227,9 +231,10 @@ define void @insert_8xfloat_idx(ptr %src, ptr %dst, float %in, i32 %idx) nounwin ; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 ; CHECK-NEXT: xvld $xr1, $a0, 0 ; CHECK-NEXT: xvst $xr1, $sp, 0 -; CHECK-NEXT: addi.d $a0, $sp, 0 -; CHECK-NEXT: bstrins.d $a0, $a2, 4, 2 -; CHECK-NEXT: fst.s $fa0, $a0, 0 +; CHECK-NEXT: bstrpick.d $a0, $a2, 31, 0 +; CHECK-NEXT: addi.d $a2, $sp, 0 +; CHECK-NEXT: bstrins.d $a2, $a0, 4, 2 +; CHECK-NEXT: fst.s $fa0, $a2, 0 ; CHECK-NEXT: xvld $xr0, $sp, 0 ; CHECK-NEXT: xvst $xr0, $a1, 0 ; CHECK-NEXT: addi.d $sp, $fp, -64 @@ -253,9 +258,10 @@ define void @insert_4xdouble_idx(ptr %src, ptr %dst, double %in, i32 %idx) nounw ; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 ; CHECK-NEXT: xvld $xr1, $a0, 0 ; CHECK-NEXT: xvst $xr1, $sp, 0 -; CHECK-NEXT: addi.d $a0, $sp, 0 -; CHECK-NEXT: bstrins.d $a0, $a2, 4, 3 -; CHECK-NEXT: fst.d $fa0, $a0, 0 +; CHECK-NEXT: bstrpick.d $a0, $a2, 31, 0 +; CHECK-NEXT: addi.d $a2, $sp, 0 +; CHECK-NEXT: bstrins.d $a2, $a0, 4, 3 +; CHECK-NEXT: fst.d $fa0, $a2, 0 ; CHECK-NEXT: xvld $xr0, $sp, 0 ; CHECK-NEXT: xvst $xr0, $a1, 0 ; CHECK-NEXT: addi.d $sp, $fp, -64 diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/insertelement.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/insertelement.ll index 7f232073ae129..19171b7d8ed78 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/insertelement.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/insertelement.ll @@ -87,9 +87,10 @@ define void @insert_16xi8_idx(ptr %src, ptr %dst, i8 %ins, i32 %idx) nounwind { ; CHECK-NEXT: addi.d $sp, $sp, -16 ; CHECK-NEXT: vld $vr0, $a0, 0 ; CHECK-NEXT: vst $vr0, $sp, 0 -; CHECK-NEXT: addi.d $a0, $sp, 0 -; CHECK-NEXT: bstrins.d $a0, $a3, 3, 0 -; CHECK-NEXT: st.b $a2, $a0, 0 +; CHECK-NEXT: bstrpick.d $a0, $a3, 31, 0 +; CHECK-NEXT: addi.d $a3, $sp, 0 +; CHECK-NEXT: bstrins.d $a3, $a0, 3, 0 +; CHECK-NEXT: st.b $a2, $a3, 0 ; CHECK-NEXT: vld $vr0, $sp, 0 ; CHECK-NEXT: vst $vr0, $a1, 0 ; CHECK-NEXT: addi.d $sp, $sp, 16 @@ -106,9 +107,10 @@ define void @insert_8xi16_idx(ptr %src, ptr %dst, i16 %ins, i32 %idx) nounwind { ; CHECK-NEXT: addi.d $sp, $sp, -16 ; CHECK-NEXT: vld $vr0, $a0, 0 ; CHECK-NEXT: vst $vr0, $sp, 0 -; CHECK-NEXT: addi.d $a0, $sp, 0 -; CHECK-NEXT: bstrins.d $a0, $a3, 3, 1 -; CHECK-NEXT: st.h $a2, $a0, 0 +; CHECK-NEXT: bstrpick.d $a0, $a3, 31, 0 +; CHECK-NEXT: addi.d $a3, $sp, 0 +; CHECK-NEXT: bstrins.d $a3, $a0, 3, 1 +; CHECK-NEXT: st.h $a2, $a3, 0 ; CHECK-NEXT: vld $vr0, $sp, 0 ; CHECK-NEXT: vst $vr0, $a1, 0 ; CHECK-NEXT: addi.d $sp, $sp, 16 @@ -125,9 +127,10 @@ define void @insert_4xi32_idx(ptr %src, ptr %dst, i32 %ins, i32 %idx) nounwind { ; CHECK-NEXT: addi.d $sp, $sp, -16 ; CHECK-NEXT: vld $vr0, $a0, 0 ; CHECK-NEXT: vst $vr0, $sp, 0 -; CHECK-NEXT: addi.d $a0, $sp, 0 -; CHECK-NEXT: bstrins.d $a0, $a3, 3, 2 -; CHECK-NEXT: st.w $a2, $a0, 0 +; CHECK-NEXT: bstrpick.d $a0, $a3, 31, 0 +; CHECK-NEXT: addi.d $a3, $sp, 0 +; CHECK-NEXT: bstrins.d $a3, $a0, 3, 2 +; CHECK-NEXT: st.w $a2, $a3, 0 ; CHECK-NEXT: vld $vr0, $sp, 0 ; CHECK-NEXT: vst $vr0, $a1, 0 ; CHECK-NEXT: addi.d $sp, $sp, 16 @@ -144,9 +147,10 @@ define void @insert_2xi64_idx(ptr %src, ptr %dst, i64 %ins, i32 %idx) nounwind { ; CHECK-NEXT: addi.d $sp, $sp, -16 ; CHECK-NEXT: vld $vr0, $a0, 0 ; CHECK-NEXT: vst $vr0, $sp, 0 -; CHECK-NEXT: addi.d $a0, $sp, 0 -; CHECK-NEXT: bstrins.d $a0, $a3, 3, 3 -; CHECK-NEXT: st.d $a2, $a0, 0 +; CHECK-NEXT: bstrpick.d $a0, $a3, 31, 0 +; CHECK-NEXT: addi.d $a3, $sp, 0 +; CHECK-NEXT: bstrins.d $a3, $a0, 3, 3 +; CHECK-NEXT: st.d $a2, $a3, 0 ; CHECK-NEXT: vld $vr0, $sp, 0 ; CHECK-NEXT: vst $vr0, $a1, 0 ; CHECK-NEXT: addi.d $sp, $sp, 16 @@ -163,9 +167,10 @@ define void @insert_4xfloat_idx(ptr %src, ptr %dst, float %ins, i32 %idx) nounwi ; CHECK-NEXT: addi.d $sp, $sp, -16 ; CHECK-NEXT: vld $vr1, $a0, 0 ; CHECK-NEXT: vst $vr1, $sp, 0 -; CHECK-NEXT: addi.d $a0, $sp, 0 -; CHECK-NEXT: bstrins.d $a0, $a2, 3, 2 -; CHECK-NEXT: fst.s $fa0, $a0, 0 +; CHECK-NEXT: bstrpick.d $a0, $a2, 31, 0 +; CHECK-NEXT: addi.d $a2, $sp, 0 +; CHECK-NEXT: bstrins.d $a2, $a0, 3, 2 +; CHECK-NEXT: fst.s $fa0, $a2, 0 ; CHECK-NEXT: vld $vr0, $sp, 0 ; CHECK-NEXT: vst $vr0, $a1, 0 ; CHECK-NEXT: addi.d $sp, $sp, 16 @@ -182,9 +187,10 @@ define void @insert_2xdouble_idx(ptr %src, ptr %dst, double %ins, i32 %idx) noun ; CHECK-NEXT: addi.d $sp, $sp, -16 ; CHECK-NEXT: vld $vr1, $a0, 0 ; CHECK-NEXT: vst $vr1, $sp, 0 -; CHECK-NEXT: addi.d $a0, $sp, 0 -; CHECK-NEXT: bstrins.d $a0, $a2, 3, 3 -; CHECK-NEXT: fst.d $fa0, $a0, 0 +; CHECK-NEXT: bstrpick.d $a0, $a2, 31, 0 +; CHECK-NEXT: addi.d $a2, $sp, 0 +; CHECK-NEXT: bstrins.d $a2, $a0, 3, 3 +; CHECK-NEXT: fst.d $fa0, $a2, 0 ; CHECK-NEXT: vld $vr0, $sp, 0 ; CHECK-NEXT: vst $vr0, $a1, 0 ; CHECK-NEXT: addi.d $sp, $sp, 16 diff --git a/llvm/test/CodeGen/X86/2009-06-05-VariableIndexInsert.ll b/llvm/test/CodeGen/X86/2009-06-05-VariableIndexInsert.ll index 535450a52ff60..695a2d0cd806e 100644 --- a/llvm/test/CodeGen/X86/2009-06-05-VariableIndexInsert.ll +++ b/llvm/test/CodeGen/X86/2009-06-05-VariableIndexInsert.ll @@ -9,11 +9,11 @@ define <2 x i64> @_mm_insert_epi16(<2 x i64> %a, i32 %b, i32 %imm) nounwind read ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-16, %esp ; X86-NEXT: subl $32, %esp -; X86-NEXT: movzwl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: andl $7, %ecx +; X86-NEXT: movl 12(%ebp), %eax +; X86-NEXT: movzwl 8(%ebp), %ecx +; X86-NEXT: andl $7, %eax ; X86-NEXT: movaps %xmm0, (%esp) -; X86-NEXT: movw %ax, (%esp,%ecx,2) +; X86-NEXT: movw %cx, (%esp,%eax,2) ; X86-NEXT: movaps (%esp), %xmm0 ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp diff --git a/llvm/test/CodeGen/X86/insertelement-var-index.ll b/llvm/test/CodeGen/X86/insertelement-var-index.ll index 8ed8495d7a461..5420e6b5ce86f 100644 --- a/llvm/test/CodeGen/X86/insertelement-var-index.ll +++ b/llvm/test/CodeGen/X86/insertelement-var-index.ll @@ -1009,18 +1009,19 @@ define <2 x i64> @arg_i64_v2i64(<2 x i64> %v, i64 %x, i32 %y) nounwind { ; X86AVX2-NEXT: pushl %esi ; X86AVX2-NEXT: andl $-16, %esp ; X86AVX2-NEXT: subl $48, %esp -; X86AVX2-NEXT: movl 8(%ebp), %eax -; X86AVX2-NEXT: movl 12(%ebp), %ecx -; X86AVX2-NEXT: movl 16(%ebp), %edx +; X86AVX2-NEXT: movl 8(%ebp), %edx +; X86AVX2-NEXT: movl 12(%ebp), %eax +; X86AVX2-NEXT: movl 16(%ebp), %ecx ; X86AVX2-NEXT: vmovaps %xmm0, (%esp) -; X86AVX2-NEXT: leal (%edx,%edx), %esi +; X86AVX2-NEXT: addl %ecx, %ecx +; X86AVX2-NEXT: movl %ecx, %esi ; X86AVX2-NEXT: andl $3, %esi -; X86AVX2-NEXT: movl %eax, (%esp,%esi,4) +; X86AVX2-NEXT: movl %edx, (%esp,%esi,4) ; X86AVX2-NEXT: vmovaps (%esp), %xmm0 ; X86AVX2-NEXT: vmovaps %xmm0, {{[0-9]+}}(%esp) -; X86AVX2-NEXT: leal 1(%edx,%edx), %eax -; X86AVX2-NEXT: andl $3, %eax -; X86AVX2-NEXT: movl %ecx, 16(%esp,%eax,4) +; X86AVX2-NEXT: incl %ecx +; X86AVX2-NEXT: andl $3, %ecx +; X86AVX2-NEXT: movl %eax, 16(%esp,%ecx,4) ; X86AVX2-NEXT: vmovaps {{[0-9]+}}(%esp), %xmm0 ; X86AVX2-NEXT: leal -4(%ebp), %esp ; X86AVX2-NEXT: popl %esi @@ -1362,12 +1363,13 @@ define <2 x i64> @load_i64_v2i64(<2 x i64> %v, ptr %p, i32 %y) nounwind { ; X86AVX2-NEXT: movl (%ecx), %edx ; X86AVX2-NEXT: movl 4(%ecx), %ecx ; X86AVX2-NEXT: vmovaps %xmm0, (%esp) -; X86AVX2-NEXT: leal (%eax,%eax), %esi +; X86AVX2-NEXT: addl %eax, %eax +; X86AVX2-NEXT: movl %eax, %esi ; X86AVX2-NEXT: andl $3, %esi ; X86AVX2-NEXT: movl %edx, (%esp,%esi,4) ; X86AVX2-NEXT: vmovaps (%esp), %xmm0 ; X86AVX2-NEXT: vmovaps %xmm0, {{[0-9]+}}(%esp) -; X86AVX2-NEXT: leal 1(%eax,%eax), %eax +; X86AVX2-NEXT: incl %eax ; X86AVX2-NEXT: andl $3, %eax ; X86AVX2-NEXT: movl %ecx, 16(%esp,%eax,4) ; X86AVX2-NEXT: vmovaps {{[0-9]+}}(%esp), %xmm0 @@ -1742,18 +1744,19 @@ define <4 x i64> @arg_i64_v4i64(<4 x i64> %v, i64 %x, i32 %y) nounwind { ; X86AVX2-NEXT: pushl %esi ; X86AVX2-NEXT: andl $-32, %esp ; X86AVX2-NEXT: subl $96, %esp -; X86AVX2-NEXT: movl 8(%ebp), %eax -; X86AVX2-NEXT: movl 12(%ebp), %ecx -; X86AVX2-NEXT: movl 16(%ebp), %edx +; X86AVX2-NEXT: movl 8(%ebp), %edx +; X86AVX2-NEXT: movl 12(%ebp), %eax +; X86AVX2-NEXT: movl 16(%ebp), %ecx ; X86AVX2-NEXT: vmovaps %ymm0, (%esp) -; X86AVX2-NEXT: leal (%edx,%edx), %esi +; X86AVX2-NEXT: addl %ecx, %ecx +; X86AVX2-NEXT: movl %ecx, %esi ; X86AVX2-NEXT: andl $7, %esi -; X86AVX2-NEXT: movl %eax, (%esp,%esi,4) +; X86AVX2-NEXT: movl %edx, (%esp,%esi,4) ; X86AVX2-NEXT: vmovaps (%esp), %ymm0 ; X86AVX2-NEXT: vmovaps %ymm0, {{[0-9]+}}(%esp) -; X86AVX2-NEXT: leal 1(%edx,%edx), %eax -; X86AVX2-NEXT: andl $7, %eax -; X86AVX2-NEXT: movl %ecx, 32(%esp,%eax,4) +; X86AVX2-NEXT: incl %ecx +; X86AVX2-NEXT: andl $7, %ecx +; X86AVX2-NEXT: movl %eax, 32(%esp,%ecx,4) ; X86AVX2-NEXT: vmovaps {{[0-9]+}}(%esp), %ymm0 ; X86AVX2-NEXT: leal -4(%ebp), %esp ; X86AVX2-NEXT: popl %esi @@ -2128,12 +2131,13 @@ define <4 x i64> @load_i64_v4i64(<4 x i64> %v, ptr %p, i32 %y) nounwind { ; X86AVX2-NEXT: movl (%ecx), %edx ; X86AVX2-NEXT: movl 4(%ecx), %ecx ; X86AVX2-NEXT: vmovaps %ymm0, (%esp) -; X86AVX2-NEXT: leal (%eax,%eax), %esi +; X86AVX2-NEXT: addl %eax, %eax +; X86AVX2-NEXT: movl %eax, %esi ; X86AVX2-NEXT: andl $7, %esi ; X86AVX2-NEXT: movl %edx, (%esp,%esi,4) ; X86AVX2-NEXT: vmovaps (%esp), %ymm0 ; X86AVX2-NEXT: vmovaps %ymm0, {{[0-9]+}}(%esp) -; X86AVX2-NEXT: leal 1(%eax,%eax), %eax +; X86AVX2-NEXT: incl %eax ; X86AVX2-NEXT: andl $7, %eax ; X86AVX2-NEXT: movl %ecx, 32(%esp,%eax,4) ; X86AVX2-NEXT: vmovaps {{[0-9]+}}(%esp), %ymm0