diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td index 2601f96696ac2..1b941e84b241c 100644 --- a/llvm/include/llvm/IR/IntrinsicsX86.td +++ b/llvm/include/llvm/IR/IntrinsicsX86.td @@ -792,7 +792,7 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_sse41_mpsadbw : GCCBuiltin<"__builtin_ia32_mpsadbw128">, Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty,llvm_i8_ty], - [IntrNoMem, Commutative, ImmArg>]>; + [IntrNoMem, ImmArg>]>; } // Test instruction with bitwise comparison. @@ -1779,7 +1779,7 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". llvm_v32i8_ty], [IntrNoMem]>; def int_x86_avx2_mpsadbw : GCCBuiltin<"__builtin_ia32_mpsadbw256">, Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty, - llvm_i8_ty], [IntrNoMem, Commutative, ImmArg>]>; + llvm_i8_ty], [IntrNoMem, ImmArg>]>; } //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll b/llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll index 29b32e7beb167..ca67da6de8c27 100644 --- a/llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll +++ b/llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll @@ -731,18 +731,33 @@ define <16 x i16> @test_x86_avx2_mpsadbw(<32 x i8> %a0, <32 x i8> %a1) { } declare <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8>, <32 x i8>, i8) nounwind readnone -; FIXME: We shouldn't commute this operation to fold the load. +; We shouldn't commute this operation to fold the load. define <16 x i16> @test_x86_avx2_mpsadbw_load_op0(<32 x i8>* %ptr, <32 x i8> %a1) { -; X86-LABEL: test_x86_avx2_mpsadbw_load_op0: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vmpsadbw $7, (%eax), %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x42,0x00,0x07] -; X86-NEXT: retl # encoding: [0xc3] +; X86-AVX-LABEL: test_x86_avx2_mpsadbw_load_op0: +; X86-AVX: # %bb.0: +; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-AVX-NEXT: vmovdqa (%eax), %ymm1 # encoding: [0xc5,0xfd,0x6f,0x08] +; X86-AVX-NEXT: vmpsadbw $7, %ymm0, %ymm1, %ymm0 # encoding: [0xc4,0xe3,0x75,0x42,0xc0,0x07] +; X86-AVX-NEXT: retl # encoding: [0xc3] ; -; X64-LABEL: test_x86_avx2_mpsadbw_load_op0: -; X64: # %bb.0: -; X64-NEXT: vmpsadbw $7, (%rdi), %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x42,0x07,0x07] -; X64-NEXT: retq # encoding: [0xc3] +; X86-AVX512VL-LABEL: test_x86_avx2_mpsadbw_load_op0: +; X86-AVX512VL: # %bb.0: +; X86-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-AVX512VL-NEXT: vmovdqa (%eax), %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0x08] +; X86-AVX512VL-NEXT: vmpsadbw $7, %ymm0, %ymm1, %ymm0 # encoding: [0xc4,0xe3,0x75,0x42,0xc0,0x07] +; X86-AVX512VL-NEXT: retl # encoding: [0xc3] +; +; X64-AVX-LABEL: test_x86_avx2_mpsadbw_load_op0: +; X64-AVX: # %bb.0: +; X64-AVX-NEXT: vmovdqa (%rdi), %ymm1 # encoding: [0xc5,0xfd,0x6f,0x0f] +; X64-AVX-NEXT: vmpsadbw $7, %ymm0, %ymm1, %ymm0 # encoding: [0xc4,0xe3,0x75,0x42,0xc0,0x07] +; X64-AVX-NEXT: retq # encoding: [0xc3] +; +; X64-AVX512VL-LABEL: test_x86_avx2_mpsadbw_load_op0: +; X64-AVX512VL: # %bb.0: +; X64-AVX512VL-NEXT: vmovdqa (%rdi), %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0x0f] +; X64-AVX512VL-NEXT: vmpsadbw $7, %ymm0, %ymm1, %ymm0 # encoding: [0xc4,0xe3,0x75,0x42,0xc0,0x07] +; X64-AVX512VL-NEXT: retq # encoding: [0xc3] %a0 = load <32 x i8>, <32 x i8>* %ptr %res = call <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8> %a0, <32 x i8> %a1, i8 7) ; <<16 x i16>> [#uses=1] ret <16 x i16> %res diff --git a/llvm/test/CodeGen/X86/sse41-intrinsics-x86.ll b/llvm/test/CodeGen/X86/sse41-intrinsics-x86.ll index 66e7836f3fb6c..3f7392222db8b 100644 --- a/llvm/test/CodeGen/X86/sse41-intrinsics-x86.ll +++ b/llvm/test/CodeGen/X86/sse41-intrinsics-x86.ll @@ -116,39 +116,47 @@ define <8 x i16> @test_x86_sse41_mpsadbw(<16 x i8> %a0, <16 x i8> %a1) { } declare <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8>, <16 x i8>, i8) nounwind readnone -; FIXME: We shouldn't commute this operation to fold the load. +; We shouldn't commute this operation to fold the load. define <8 x i16> @test_x86_sse41_mpsadbw_load_op0(<16 x i8>* %ptr, <16 x i8> %a1) { ; X86-SSE-LABEL: test_x86_sse41_mpsadbw_load_op0: ; X86-SSE: ## %bb.0: ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] -; X86-SSE-NEXT: mpsadbw $7, (%eax), %xmm0 ## encoding: [0x66,0x0f,0x3a,0x42,0x00,0x07] +; X86-SSE-NEXT: movdqa (%eax), %xmm1 ## encoding: [0x66,0x0f,0x6f,0x08] +; X86-SSE-NEXT: mpsadbw $7, %xmm0, %xmm1 ## encoding: [0x66,0x0f,0x3a,0x42,0xc8,0x07] +; X86-SSE-NEXT: movdqa %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x6f,0xc1] ; X86-SSE-NEXT: retl ## encoding: [0xc3] ; ; X86-AVX1-LABEL: test_x86_sse41_mpsadbw_load_op0: ; X86-AVX1: ## %bb.0: ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] -; X86-AVX1-NEXT: vmpsadbw $7, (%eax), %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x42,0x00,0x07] +; X86-AVX1-NEXT: vmovdqa (%eax), %xmm1 ## encoding: [0xc5,0xf9,0x6f,0x08] +; X86-AVX1-NEXT: vmpsadbw $7, %xmm0, %xmm1, %xmm0 ## encoding: [0xc4,0xe3,0x71,0x42,0xc0,0x07] ; X86-AVX1-NEXT: retl ## encoding: [0xc3] ; ; X86-AVX512-LABEL: test_x86_sse41_mpsadbw_load_op0: ; X86-AVX512: ## %bb.0: ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] -; X86-AVX512-NEXT: vmpsadbw $7, (%eax), %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x42,0x00,0x07] +; X86-AVX512-NEXT: vmovdqa (%eax), %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0x08] +; X86-AVX512-NEXT: vmpsadbw $7, %xmm0, %xmm1, %xmm0 ## encoding: [0xc4,0xe3,0x71,0x42,0xc0,0x07] ; X86-AVX512-NEXT: retl ## encoding: [0xc3] ; ; X64-SSE-LABEL: test_x86_sse41_mpsadbw_load_op0: ; X64-SSE: ## %bb.0: -; X64-SSE-NEXT: mpsadbw $7, (%rdi), %xmm0 ## encoding: [0x66,0x0f,0x3a,0x42,0x07,0x07] +; X64-SSE-NEXT: movdqa (%rdi), %xmm1 ## encoding: [0x66,0x0f,0x6f,0x0f] +; X64-SSE-NEXT: mpsadbw $7, %xmm0, %xmm1 ## encoding: [0x66,0x0f,0x3a,0x42,0xc8,0x07] +; X64-SSE-NEXT: movdqa %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x6f,0xc1] ; X64-SSE-NEXT: retq ## encoding: [0xc3] ; ; X64-AVX1-LABEL: test_x86_sse41_mpsadbw_load_op0: ; X64-AVX1: ## %bb.0: -; X64-AVX1-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x42,0x07,0x07] +; X64-AVX1-NEXT: vmovdqa (%rdi), %xmm1 ## encoding: [0xc5,0xf9,0x6f,0x0f] +; X64-AVX1-NEXT: vmpsadbw $7, %xmm0, %xmm1, %xmm0 ## encoding: [0xc4,0xe3,0x71,0x42,0xc0,0x07] ; X64-AVX1-NEXT: retq ## encoding: [0xc3] ; ; X64-AVX512-LABEL: test_x86_sse41_mpsadbw_load_op0: ; X64-AVX512: ## %bb.0: -; X64-AVX512-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x42,0x07,0x07] +; X64-AVX512-NEXT: vmovdqa (%rdi), %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0x0f] +; X64-AVX512-NEXT: vmpsadbw $7, %xmm0, %xmm1, %xmm0 ## encoding: [0xc4,0xe3,0x71,0x42,0xc0,0x07] ; X64-AVX512-NEXT: retq ## encoding: [0xc3] %a0 = load <16 x i8>, <16 x i8>* %ptr %res = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <<8 x i16>> [#uses=1]