diff --git a/llvm/lib/Target/X86/X86ScheduleSLM.td b/llvm/lib/Target/X86/X86ScheduleSLM.td index a545f3cecb7c2..2bcb33e6b0bca 100644 --- a/llvm/lib/Target/X86/X86ScheduleSLM.td +++ b/llvm/lib/Target/X86/X86ScheduleSLM.td @@ -372,8 +372,8 @@ defm : SLMWriteResPair; defm : SLMWriteResPair; defm : X86WriteResPairUnsupported; defm : SLMWriteResPair; -defm : SLMWriteResPair; -defm : SLMWriteResPair; +defm : SLMWriteResPair; +defm : SLMWriteResPair; defm : X86WriteResPairUnsupported; // FIXME: The below is closer to correct, but caused some perf regressions. //defm : SLMWriteResPair; diff --git a/llvm/test/CodeGen/X86/slow-pmulld.ll b/llvm/test/CodeGen/X86/slow-pmulld.ll index aeccae300eea5..9a10a2353e16a 100644 --- a/llvm/test/CodeGen/X86/slow-pmulld.ll +++ b/llvm/test/CodeGen/X86/slow-pmulld.ll @@ -537,40 +537,40 @@ define <8 x i32> @test_mul_v8i32_v8i16(<8 x i16> %A) { define <16 x i32> @test_mul_v16i32_v16i16(<16 x i16> %A) { ; SLM32-LABEL: test_mul_v16i32_v16i16: ; SLM32: # %bb.0: -; SLM32-NEXT: movdqa %xmm0, %xmm4 -; SLM32-NEXT: movdqa {{.*#+}} xmm0 = [18778,18778,18778,18778,18778,18778,18778,18778] ; SLM32-NEXT: movdqa %xmm1, %xmm3 -; SLM32-NEXT: movdqa %xmm4, %xmm2 -; SLM32-NEXT: pmullw %xmm0, %xmm4 +; SLM32-NEXT: movdqa %xmm0, %xmm1 +; SLM32-NEXT: movdqa {{.*#+}} xmm0 = [18778,18778,18778,18778,18778,18778,18778,18778] +; SLM32-NEXT: movdqa %xmm1, %xmm2 +; SLM32-NEXT: movdqa %xmm3, %xmm4 +; SLM32-NEXT: pmullw %xmm0, %xmm1 ; SLM32-NEXT: pmulhuw %xmm0, %xmm2 ; SLM32-NEXT: pmullw %xmm0, %xmm3 -; SLM32-NEXT: pmulhuw %xmm0, %xmm1 -; SLM32-NEXT: movdqa %xmm4, %xmm0 -; SLM32-NEXT: punpckhwd {{.*#+}} xmm4 = xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7] +; SLM32-NEXT: pmulhuw %xmm0, %xmm4 +; SLM32-NEXT: movdqa %xmm1, %xmm0 +; SLM32-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] ; SLM32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] ; SLM32-NEXT: movdqa %xmm3, %xmm2 -; SLM32-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7] -; SLM32-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] -; SLM32-NEXT: movdqa %xmm4, %xmm1 +; SLM32-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7] +; SLM32-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3] ; SLM32-NEXT: retl ; ; SLM64-LABEL: test_mul_v16i32_v16i16: ; SLM64: # %bb.0: -; SLM64-NEXT: movdqa %xmm0, %xmm4 -; SLM64-NEXT: movdqa {{.*#+}} xmm0 = [18778,18778,18778,18778,18778,18778,18778,18778] ; SLM64-NEXT: movdqa %xmm1, %xmm3 -; SLM64-NEXT: movdqa %xmm4, %xmm2 -; SLM64-NEXT: pmullw %xmm0, %xmm4 +; SLM64-NEXT: movdqa %xmm0, %xmm1 +; SLM64-NEXT: movdqa {{.*#+}} xmm0 = [18778,18778,18778,18778,18778,18778,18778,18778] +; SLM64-NEXT: movdqa %xmm1, %xmm2 +; SLM64-NEXT: movdqa %xmm3, %xmm4 +; SLM64-NEXT: pmullw %xmm0, %xmm1 ; SLM64-NEXT: pmulhuw %xmm0, %xmm2 ; SLM64-NEXT: pmullw %xmm0, %xmm3 -; SLM64-NEXT: pmulhuw %xmm0, %xmm1 -; SLM64-NEXT: movdqa %xmm4, %xmm0 -; SLM64-NEXT: punpckhwd {{.*#+}} xmm4 = xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7] +; SLM64-NEXT: pmulhuw %xmm0, %xmm4 +; SLM64-NEXT: movdqa %xmm1, %xmm0 +; SLM64-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] ; SLM64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] ; SLM64-NEXT: movdqa %xmm3, %xmm2 -; SLM64-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7] -; SLM64-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] -; SLM64-NEXT: movdqa %xmm4, %xmm1 +; SLM64-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7] +; SLM64-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3] ; SLM64-NEXT: retq ; ; SLOW32-LABEL: test_mul_v16i32_v16i16: diff --git a/llvm/test/tools/llvm-mca/X86/SLM/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/SLM/resources-sse2.s index 8c5fff166cab4..dfdfa1320a2a5 100644 --- a/llvm/test/tools/llvm-mca/X86/SLM/resources-sse2.s +++ b/llvm/test/tools/llvm-mca/X86/SLM/resources-sse2.s @@ -563,8 +563,8 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 1 1 1.00 pextrw $1, %xmm0, %ecx # CHECK-NEXT: 1 1 1.00 pinsrw $1, %eax, %xmm0 # CHECK-NEXT: 1 4 1.00 * pinsrw $1, (%rax), %xmm0 -# CHECK-NEXT: 2 5 2.00 pmaddwd %xmm0, %xmm2 -# CHECK-NEXT: 2 8 2.00 * pmaddwd (%rax), %xmm2 +# CHECK-NEXT: 1 5 2.00 pmaddwd %xmm0, %xmm2 +# CHECK-NEXT: 1 8 2.00 * pmaddwd (%rax), %xmm2 # CHECK-NEXT: 1 1 0.50 pmaxsw %xmm0, %xmm2 # CHECK-NEXT: 1 4 1.00 * pmaxsw (%rax), %xmm2 # CHECK-NEXT: 1 1 0.50 pmaxub %xmm0, %xmm2 @@ -574,16 +574,16 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 1 1 0.50 pminub %xmm0, %xmm2 # CHECK-NEXT: 1 4 1.00 * pminub (%rax), %xmm2 # CHECK-NEXT: 1 4 1.00 pmovmskb %xmm0, %ecx -# CHECK-NEXT: 2 5 2.00 pmulhuw %xmm0, %xmm2 -# CHECK-NEXT: 2 8 2.00 * pmulhuw (%rax), %xmm2 -# CHECK-NEXT: 2 5 2.00 pmulhw %xmm0, %xmm2 -# CHECK-NEXT: 2 8 2.00 * pmulhw (%rax), %xmm2 -# CHECK-NEXT: 2 5 2.00 pmullw %xmm0, %xmm2 -# CHECK-NEXT: 2 8 2.00 * pmullw (%rax), %xmm2 +# CHECK-NEXT: 1 5 2.00 pmulhuw %xmm0, %xmm2 +# CHECK-NEXT: 1 8 2.00 * pmulhuw (%rax), %xmm2 +# CHECK-NEXT: 1 5 2.00 pmulhw %xmm0, %xmm2 +# CHECK-NEXT: 1 8 2.00 * pmulhw (%rax), %xmm2 +# CHECK-NEXT: 1 5 2.00 pmullw %xmm0, %xmm2 +# CHECK-NEXT: 1 8 2.00 * pmullw (%rax), %xmm2 # CHECK-NEXT: 1 4 1.00 pmuludq %mm0, %mm2 # CHECK-NEXT: 1 7 1.00 * pmuludq (%rax), %mm2 -# CHECK-NEXT: 2 5 2.00 pmuludq %xmm0, %xmm2 -# CHECK-NEXT: 2 8 2.00 * pmuludq (%rax), %xmm2 +# CHECK-NEXT: 1 5 2.00 pmuludq %xmm0, %xmm2 +# CHECK-NEXT: 1 8 2.00 * pmuludq (%rax), %xmm2 # CHECK-NEXT: 1 1 0.50 por %xmm0, %xmm2 # CHECK-NEXT: 1 4 1.00 * por (%rax), %xmm2 # CHECK-NEXT: 1 4 1.00 psadbw %xmm0, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/SLM/resources-sse41.s b/llvm/test/tools/llvm-mca/X86/SLM/resources-sse41.s index 2bcebead6181a..e0e19e681853b 100644 --- a/llvm/test/tools/llvm-mca/X86/SLM/resources-sse41.s +++ b/llvm/test/tools/llvm-mca/X86/SLM/resources-sse41.s @@ -237,8 +237,8 @@ roundss $1, (%rax), %xmm2 # CHECK-NEXT: 1 4 1.00 * pmovzxwd (%rax), %xmm2 # CHECK-NEXT: 1 1 1.00 pmovzxwq %xmm0, %xmm2 # CHECK-NEXT: 1 4 1.00 * pmovzxwq (%rax), %xmm2 -# CHECK-NEXT: 2 5 2.00 pmuldq %xmm0, %xmm2 -# CHECK-NEXT: 2 8 2.00 * pmuldq (%rax), %xmm2 +# CHECK-NEXT: 1 5 2.00 pmuldq %xmm0, %xmm2 +# CHECK-NEXT: 1 8 2.00 * pmuldq (%rax), %xmm2 # CHECK-NEXT: 1 4 1.00 pmulld %xmm0, %xmm2 # CHECK-NEXT: 1 7 1.00 * pmulld (%rax), %xmm2 # CHECK-NEXT: 1 1 0.50 ptest %xmm0, %xmm1 diff --git a/llvm/test/tools/llvm-mca/X86/SLM/resources-ssse3.s b/llvm/test/tools/llvm-mca/X86/SLM/resources-ssse3.s index f6c1bfe3bae8d..3fb48787d929f 100644 --- a/llvm/test/tools/llvm-mca/X86/SLM/resources-ssse3.s +++ b/llvm/test/tools/llvm-mca/X86/SLM/resources-ssse3.s @@ -148,12 +148,12 @@ psignw (%rax), %xmm2 # CHECK-NEXT: 1 4 1.00 * phsubw (%rax), %xmm2 # CHECK-NEXT: 1 4 1.00 pmaddubsw %mm0, %mm2 # CHECK-NEXT: 1 7 1.00 * pmaddubsw (%rax), %mm2 -# CHECK-NEXT: 2 5 2.00 pmaddubsw %xmm0, %xmm2 -# CHECK-NEXT: 2 8 2.00 * pmaddubsw (%rax), %xmm2 +# CHECK-NEXT: 1 5 2.00 pmaddubsw %xmm0, %xmm2 +# CHECK-NEXT: 1 8 2.00 * pmaddubsw (%rax), %xmm2 # CHECK-NEXT: 1 4 1.00 pmulhrsw %mm0, %mm2 # CHECK-NEXT: 1 7 1.00 * pmulhrsw (%rax), %mm2 -# CHECK-NEXT: 2 5 2.00 pmulhrsw %xmm0, %xmm2 -# CHECK-NEXT: 2 8 2.00 * pmulhrsw (%rax), %xmm2 +# CHECK-NEXT: 1 5 2.00 pmulhrsw %xmm0, %xmm2 +# CHECK-NEXT: 1 8 2.00 * pmulhrsw (%rax), %xmm2 # CHECK-NEXT: 1 1 1.00 pshufb %mm0, %mm2 # CHECK-NEXT: 1 4 1.00 * pshufb (%rax), %mm2 # CHECK-NEXT: 4 5 5.00 pshufb %xmm0, %xmm2