From 2a98dca7905eaf3e5c70f13e7ba3506ae13f92f9 Mon Sep 17 00:00:00 2001 From: Folkert de Vries Date: Mon, 17 Nov 2025 16:01:02 +0100 Subject: [PATCH] correct signedness of pmadd arguments --- crates/core_arch/src/x86/avx2.rs | 4 ++-- crates/core_arch/src/x86/avx512bw.rs | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/crates/core_arch/src/x86/avx2.rs b/crates/core_arch/src/x86/avx2.rs index e8213615a2..8be302cabc 100644 --- a/crates/core_arch/src/x86/avx2.rs +++ b/crates/core_arch/src/x86/avx2.rs @@ -1773,7 +1773,7 @@ pub fn _mm256_madd_epi16(a: __m256i, b: __m256i) -> __m256i { #[cfg_attr(test, assert_instr(vpmaddubsw))] #[stable(feature = "simd_x86", since = "1.27.0")] pub fn _mm256_maddubs_epi16(a: __m256i, b: __m256i) -> __m256i { - unsafe { transmute(pmaddubsw(a.as_u8x32(), b.as_u8x32())) } + unsafe { transmute(pmaddubsw(a.as_u8x32(), b.as_i8x32())) } } /// Loads packed 32-bit integers from memory pointed by `mem_addr` using `mask` @@ -3702,7 +3702,7 @@ unsafe extern "C" { #[link_name = "llvm.x86.avx2.phsub.sw"] fn phsubsw(a: i16x16, b: i16x16) -> i16x16; #[link_name = "llvm.x86.avx2.pmadd.ub.sw"] - fn pmaddubsw(a: u8x32, b: u8x32) -> i16x16; + fn pmaddubsw(a: u8x32, b: i8x32) -> i16x16; #[link_name = "llvm.x86.avx2.mpsadbw"] fn mpsadbw(a: u8x32, b: u8x32, imm8: i8) -> u16x16; #[link_name = "llvm.x86.avx2.pmul.hr.sw"] diff --git a/crates/core_arch/src/x86/avx512bw.rs b/crates/core_arch/src/x86/avx512bw.rs index aee705fb46..0e2dd3ad40 100644 --- a/crates/core_arch/src/x86/avx512bw.rs +++ b/crates/core_arch/src/x86/avx512bw.rs @@ -5955,7 +5955,7 @@ pub fn _mm_maskz_madd_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpmaddubsw))] pub fn _mm512_maddubs_epi16(a: __m512i, b: __m512i) -> __m512i { - unsafe { transmute(vpmaddubsw(a.as_i8x64(), b.as_i8x64())) } + unsafe { transmute(vpmaddubsw(a.as_u8x64(), b.as_i8x64())) } } /// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -11688,7 +11688,7 @@ unsafe extern "C" { fn vpmulhrsw(a: i16x32, b: i16x32) -> i16x32; #[link_name = "llvm.x86.avx512.pmaddubs.w.512"] - fn vpmaddubsw(a: i8x64, b: i8x64) -> i16x32; + fn vpmaddubsw(a: u8x64, b: i8x64) -> i16x32; #[link_name = "llvm.x86.avx512.packssdw.512"] fn vpackssdw(a: i32x16, b: i32x16) -> i16x32;