diff --git a/Cargo.lock b/Cargo.lock
index 476f5578187b4..7f91d12a419c1 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2136,6 +2136,16 @@ dependencies = [
  "winapi",
 ]
 
+[[package]]
+name = "libloading"
+version = "0.8.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c571b676ddfc9a8c12f1f3d3085a7b163966a8fd8098a90640953ce5f6170161"
+dependencies = [
+ "cfg-if",
+ "windows-sys 0.48.0",
+]
+
 [[package]]
 name = "libm"
 version = "0.1.4"
@@ -2411,7 +2421,7 @@ dependencies = [
  "lazy_static",
  "libc",
  "libffi",
- "libloading",
+ "libloading 0.8.1",
  "log",
  "measureme",
  "rand",
@@ -3914,7 +3924,7 @@ dependencies = [
 name = "rustc_interface"
 version = "0.0.0"
 dependencies = [
- "libloading",
+ "libloading 0.7.4",
  "rustc-rayon",
  "rustc-rayon-core",
  "rustc_ast",
@@ -4045,7 +4055,7 @@ name = "rustc_metadata"
 version = "0.0.0"
 dependencies = [
  "bitflags 1.3.2",
- "libloading",
+ "libloading 0.7.4",
  "odht",
  "rustc_ast",
  "rustc_attr",
@@ -4260,7 +4270,7 @@ dependencies = [
 name = "rustc_plugin_impl"
 version = "0.0.0"
 dependencies = [
- "libloading",
+ "libloading 0.7.4",
  "rustc_ast",
  "rustc_errors",
  "rustc_fluent_macro",
diff --git a/src/tools/miri/Cargo.lock b/src/tools/miri/Cargo.lock
index d236e4bbcb401..3f11e4db327e6 100644
--- a/src/tools/miri/Cargo.lock
+++ b/src/tools/miri/Cargo.lock
@@ -409,12 +409,12 @@ dependencies = [
 
 [[package]]
 name = "libloading"
-version = "0.7.4"
+version = "0.8.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b67380fd3b2fbe7527a606e18729d21c6f3951633d0500574c4dc22d2d638b9f"
+checksum = "d580318f95776505201b28cf98eb1fa5e4be3b689633ba6a3e6cd880ff22d8cb"
 dependencies = [
  "cfg-if",
- "winapi",
+ "windows-sys 0.48.0",
 ]
 
 [[package]]
@@ -947,9 +947,9 @@ dependencies = [
 
 [[package]]
 name = "ui_test"
-version = "0.21.1"
+version = "0.21.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "accffe020b57a6dd50014d457b5842c5a2ca73cd84f07d86d0a19c460a6509ae"
+checksum = "aaf4bf7c184b8dfc7a4d3b90df789b1eb992ee42811cd115f32a7a1eb781058d"
 dependencies = [
  "annotate-snippets",
  "anyhow",
diff --git a/src/tools/miri/Cargo.toml b/src/tools/miri/Cargo.toml
index 0df2c160e2f23..c911a153c1373 100644
--- a/src/tools/miri/Cargo.toml
+++ b/src/tools/miri/Cargo.toml
@@ -32,7 +32,7 @@ libc = "0.2"
 
 [target.'cfg(target_os = "linux")'.dependencies]
 libffi = "3.2.0"
-libloading = "0.7"
+libloading = "0.8"
 
 [dev-dependencies]
 colored = "2"
diff --git a/src/tools/miri/rust-version b/src/tools/miri/rust-version
index 07dd52ce94159..b60de8344d948 100644
--- a/src/tools/miri/rust-version
+++ b/src/tools/miri/rust-version
@@ -1 +1 @@
-2ba4eb2d49e774b5fbc2a06258ac7b0f60b92b7e
+bb6c66be3793ac5c738eeac91ecdc4b99388d0b4
diff --git a/src/tools/miri/src/shims/intrinsics/mod.rs b/src/tools/miri/src/shims/intrinsics/mod.rs
index 063f74f989d02..8c90ceba1e4aa 100644
--- a/src/tools/miri/src/shims/intrinsics/mod.rs
+++ b/src/tools/miri/src/shims/intrinsics/mod.rs
@@ -325,7 +325,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
 
             "fmaf32" => {
                 let [a, b, c] = check_arg_count(args)?;
-                // FIXME: Using host floats, to work around https://github.com/rust-lang/miri/issues/2468.
+                // FIXME: Using host floats, to work around https://github.com/rust-lang/rustc_apfloat/issues/11
                 let a = f32::from_bits(this.read_scalar(a)?.to_u32()?);
                 let b = f32::from_bits(this.read_scalar(b)?.to_u32()?);
                 let c = f32::from_bits(this.read_scalar(c)?.to_u32()?);
@@ -335,7 +335,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
 
             "fmaf64" => {
                 let [a, b, c] = check_arg_count(args)?;
-                // FIXME: Using host floats, to work around https://github.com/rust-lang/miri/issues/2468.
+                // FIXME: Using host floats, to work around https://github.com/rust-lang/rustc_apfloat/issues/11
                 let a = f64::from_bits(this.read_scalar(a)?.to_u64()?);
                 let b = f64::from_bits(this.read_scalar(b)?.to_u64()?);
                 let c = f64::from_bits(this.read_scalar(c)?.to_u64()?);
diff --git a/src/tools/miri/src/shims/intrinsics/simd.rs b/src/tools/miri/src/shims/intrinsics/simd.rs
index 5e488679b81d6..200f37efa27b1 100644
--- a/src/tools/miri/src/shims/intrinsics/simd.rs
+++ b/src/tools/miri/src/shims/intrinsics/simd.rs
@@ -495,15 +495,21 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
                 let (right, right_len) = this.operand_to_simd(right)?;
                 let (dest, dest_len) = this.place_to_simd(dest)?;
 
-                let index = generic_args[2].expect_const().eval(*this.tcx, this.param_env(), Some(this.tcx.span)).unwrap().unwrap_branch();
+                let index = generic_args[2]
+                    .expect_const()
+                    .eval(*this.tcx, this.param_env(), Some(this.tcx.span))
+                    .unwrap()
+                    .unwrap_branch();
                 let index_len = index.len();
 
                 assert_eq!(left_len, right_len);
                 assert_eq!(index_len as u64, dest_len);
 
                 for i in 0..dest_len {
-                    let src_index: u64 = index[i as usize].unwrap_leaf()
-                        .try_to_u32().unwrap()
+                    let src_index: u64 = index[usize::try_from(i).unwrap()]
+                        .unwrap_leaf()
+                        .try_to_u32()
+                        .unwrap()
                         .into();
                     let dest = this.project_index(&dest, i)?;
 
diff --git a/src/tools/miri/src/shims/x86/mod.rs b/src/tools/miri/src/shims/x86/mod.rs
index fbfe00e03dbcb..7c280109cb011 100644
--- a/src/tools/miri/src/shims/x86/mod.rs
+++ b/src/tools/miri/src/shims/x86/mod.rs
@@ -9,6 +9,8 @@ use shims::foreign_items::EmulateByNameResult;
 
 mod sse;
 mod sse2;
+mod sse3;
+mod ssse3;
 
 impl<'mir, 'tcx: 'mir> EvalContextExt<'mir, 'tcx> for crate::MiriInterpCx<'mir, 'tcx> {}
 pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
@@ -88,6 +90,16 @@ pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
                     this, link_name, abi, args, dest,
                 );
             }
+            name if name.starts_with("sse3.") => {
+                return sse3::EvalContextExt::emulate_x86_sse3_intrinsic(
+                    this, link_name, abi, args, dest,
+                );
+            }
+            name if name.starts_with("ssse3.") => {
+                return ssse3::EvalContextExt::emulate_x86_ssse3_intrinsic(
+                    this, link_name, abi, args, dest,
+                );
+            }
             _ => return Ok(EmulateByNameResult::NotSupported),
         }
         Ok(EmulateByNameResult::NeedsJumping)
@@ -286,3 +298,44 @@ fn bin_op_simd_float_all<'tcx, F: rustc_apfloat::Float>(
 
     Ok(())
 }
+
+/// Horizontaly performs `which` operation on adjacent values of
+/// `left` and `right` SIMD vectors and stores the result in `dest`.
+fn horizontal_bin_op<'tcx>(
+    this: &mut crate::MiriInterpCx<'_, 'tcx>,
+    which: mir::BinOp,
+    saturating: bool,
+    left: &OpTy<'tcx, Provenance>,
+    right: &OpTy<'tcx, Provenance>,
+    dest: &PlaceTy<'tcx, Provenance>,
+) -> InterpResult<'tcx, ()> {
+    let (left, left_len) = this.operand_to_simd(left)?;
+    let (right, right_len) = this.operand_to_simd(right)?;
+    let (dest, dest_len) = this.place_to_simd(dest)?;
+
+    assert_eq!(dest_len, left_len);
+    assert_eq!(dest_len, right_len);
+    assert_eq!(dest_len % 2, 0);
+
+    let middle = dest_len / 2;
+    for i in 0..dest_len {
+        // `i` is the index in `dest`
+        // `j` is the index of the 2-item chunk in `src`
+        let (j, src) =
+            if i < middle { (i, &left) } else { (i.checked_sub(middle).unwrap(), &right) };
+        // `base_i` is the index of the first item of the 2-item chunk in `src`
+        let base_i = j.checked_mul(2).unwrap();
+        let lhs = this.read_immediate(&this.project_index(src, base_i)?)?;
+        let rhs = this.read_immediate(&this.project_index(src, base_i.checked_add(1).unwrap())?)?;
+
+        let res = if saturating {
+            Immediate::from(this.saturating_arith(which, &lhs, &rhs)?)
+        } else {
+            *this.wrapping_binary_op(which, &lhs, &rhs)?
+        };
+
+        this.write_immediate(res, &this.project_index(&dest, i)?)?;
+    }
+
+    Ok(())
+}
diff --git a/src/tools/miri/src/shims/x86/sse2.rs b/src/tools/miri/src/shims/x86/sse2.rs
index 2ca882167bf4b..2ef6a9b59ede7 100644
--- a/src/tools/miri/src/shims/x86/sse2.rs
+++ b/src/tools/miri/src/shims/x86/sse2.rs
@@ -82,6 +82,42 @@ pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
                     this.write_immediate(*res, &dest)?;
                 }
             }
+            // Used to implement the _mm_madd_epi16 function.
+            // Multiplies packed signed 16-bit integers in `left` and `right`, producing
+            // intermediate signed 32-bit integers. Horizontally add adjacent pairs of
+            // intermediate 32-bit integers, and pack the results in `dest`.
+            "pmadd.wd" => {
+                let [left, right] =
+                    this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
+
+                let (left, left_len) = this.operand_to_simd(left)?;
+                let (right, right_len) = this.operand_to_simd(right)?;
+                let (dest, dest_len) = this.place_to_simd(dest)?;
+
+                assert_eq!(left_len, right_len);
+                assert_eq!(dest_len.checked_mul(2).unwrap(), left_len);
+
+                for i in 0..dest_len {
+                    let j1 = i.checked_mul(2).unwrap();
+                    let left1 = this.read_scalar(&this.project_index(&left, j1)?)?.to_i16()?;
+                    let right1 = this.read_scalar(&this.project_index(&right, j1)?)?.to_i16()?;
+
+                    let j2 = j1.checked_add(1).unwrap();
+                    let left2 = this.read_scalar(&this.project_index(&left, j2)?)?.to_i16()?;
+                    let right2 = this.read_scalar(&this.project_index(&right, j2)?)?.to_i16()?;
+
+                    let dest = this.project_index(&dest, i)?;
+
+                    // Multiplications are i16*i16->i32, which will not overflow.
+                    let mul1 = i32::from(left1).checked_mul(right1.into()).unwrap();
+                    let mul2 = i32::from(left2).checked_mul(right2.into()).unwrap();
+                    // However, this addition can overflow in the most extreme case
+                    // (-0x8000)*(-0x8000)+(-0x8000)*(-0x8000) = 0x80000000
+                    let res = mul1.wrapping_add(mul2);
+
+                    this.write_scalar(Scalar::from_i32(res), &dest)?;
+                }
+            }
             // Used to implement the _mm_mulhi_epi16 and _mm_mulhi_epu16 functions.
             "pmulh.w" | "pmulhu.w" => {
                 let [left, right] =
diff --git a/src/tools/miri/src/shims/x86/sse3.rs b/src/tools/miri/src/shims/x86/sse3.rs
new file mode 100644
index 0000000000000..f5c30a521fa8b
--- /dev/null
+++ b/src/tools/miri/src/shims/x86/sse3.rs
@@ -0,0 +1,90 @@
+use rustc_middle::mir;
+use rustc_span::Symbol;
+use rustc_target::abi::Align;
+use rustc_target::spec::abi::Abi;
+
+use super::horizontal_bin_op;
+use crate::*;
+use shims::foreign_items::EmulateByNameResult;
+
+impl<'mir, 'tcx: 'mir> EvalContextExt<'mir, 'tcx> for crate::MiriInterpCx<'mir, 'tcx> {}
+pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
+    crate::MiriInterpCxExt<'mir, 'tcx>
+{
+    fn emulate_x86_sse3_intrinsic(
+        &mut self,
+        link_name: Symbol,
+        abi: Abi,
+        args: &[OpTy<'tcx, Provenance>],
+        dest: &PlaceTy<'tcx, Provenance>,
+    ) -> InterpResult<'tcx, EmulateByNameResult<'mir, 'tcx>> {
+        let this = self.eval_context_mut();
+        // Prefix should have already been checked.
+        let unprefixed_name = link_name.as_str().strip_prefix("llvm.x86.sse3.").unwrap();
+
+        match unprefixed_name {
+            // Used to implement the _mm_addsub_ps and _mm_addsub_pd functions.
+            // Alternatingly add and subtract floating point (f32 or f64) from
+            // `left` and `right`
+            "addsub.ps" | "addsub.pd" => {
+                let [left, right] =
+                    this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
+
+                let (left, left_len) = this.operand_to_simd(left)?;
+                let (right, right_len) = this.operand_to_simd(right)?;
+                let (dest, dest_len) = this.place_to_simd(dest)?;
+
+                assert_eq!(dest_len, left_len);
+                assert_eq!(dest_len, right_len);
+
+                for i in 0..dest_len {
+                    let left = this.read_immediate(&this.project_index(&left, i)?)?;
+                    let right = this.read_immediate(&this.project_index(&right, i)?)?;
+                    let dest = this.project_index(&dest, i)?;
+
+                    // Even elements are subtracted and odd elements are added.
+                    let op = if i % 2 == 0 { mir::BinOp::Sub } else { mir::BinOp::Add };
+                    let res = this.wrapping_binary_op(op, &left, &right)?;
+
+                    this.write_immediate(*res, &dest)?;
+                }
+            }
+            // Used to implement the _mm_h{add,sub}_p{s,d} functions.
+            // Horizontally add/subtract adjacent floating point values
+            // in `left` and `right`.
+            "hadd.ps" | "hadd.pd" | "hsub.ps" | "hsub.pd" => {
+                let [left, right] =
+                    this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
+
+                let which = match unprefixed_name {
+                    "hadd.ps" | "hadd.pd" => mir::BinOp::Add,
+                    "hsub.ps" | "hsub.pd" => mir::BinOp::Sub,
+                    _ => unreachable!(),
+                };
+
+                horizontal_bin_op(this, which, /*saturating*/ false, left, right, dest)?;
+            }
+            // Used to implement the _mm_lddqu_si128 function.
+            // Reads a 128-bit vector from an unaligned pointer. This intrinsic
+            // is expected to perform better than a regular unaligned read when
+            // the data crosses a cache line, but for Miri this is just a regular
+            // unaligned read.
+            "ldu.dq" => {
+                let [src_ptr] = this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
+                let src_ptr = this.read_pointer(src_ptr)?;
+                let dest = dest.force_mplace(this)?;
+
+                this.mem_copy(
+                    src_ptr,
+                    Align::ONE,
+                    dest.ptr(),
+                    Align::ONE,
+                    dest.layout.size,
+                    /*nonoverlapping*/ true,
+                )?;
+            }
+            _ => return Ok(EmulateByNameResult::NotSupported),
+        }
+        Ok(EmulateByNameResult::NeedsJumping)
+    }
+}
diff --git a/src/tools/miri/src/shims/x86/ssse3.rs b/src/tools/miri/src/shims/x86/ssse3.rs
new file mode 100644
index 0000000000000..b01546722346b
--- /dev/null
+++ b/src/tools/miri/src/shims/x86/ssse3.rs
@@ -0,0 +1,199 @@
+use rustc_middle::mir;
+use rustc_span::Symbol;
+use rustc_target::spec::abi::Abi;
+
+use super::horizontal_bin_op;
+use crate::*;
+use shims::foreign_items::EmulateByNameResult;
+
+impl<'mir, 'tcx: 'mir> EvalContextExt<'mir, 'tcx> for crate::MiriInterpCx<'mir, 'tcx> {}
+pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
+    crate::MiriInterpCxExt<'mir, 'tcx>
+{
+    fn emulate_x86_ssse3_intrinsic(
+        &mut self,
+        link_name: Symbol,
+        abi: Abi,
+        args: &[OpTy<'tcx, Provenance>],
+        dest: &PlaceTy<'tcx, Provenance>,
+    ) -> InterpResult<'tcx, EmulateByNameResult<'mir, 'tcx>> {
+        let this = self.eval_context_mut();
+        // Prefix should have already been checked.
+        let unprefixed_name = link_name.as_str().strip_prefix("llvm.x86.ssse3.").unwrap();
+
+        match unprefixed_name {
+            // Used to implement the _mm_abs_epi{8,16,32} functions.
+            // Calculates the absolute value of packed 8/16/32-bit integers.
+            "pabs.b.128" | "pabs.w.128" | "pabs.d.128" => {
+                let [op] = this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
+
+                let (op, op_len) = this.operand_to_simd(op)?;
+                let (dest, dest_len) = this.place_to_simd(dest)?;
+
+                assert_eq!(op_len, dest_len);
+
+                for i in 0..dest_len {
+                    let op = this.read_scalar(&this.project_index(&op, i)?)?;
+                    let dest = this.project_index(&dest, i)?;
+
+                    // Converting to a host "i128" works since the input is always signed.
+                    let res = op.to_int(dest.layout.size)?.unsigned_abs();
+
+                    this.write_scalar(Scalar::from_uint(res, dest.layout.size), &dest)?;
+                }
+            }
+            // Used to implement the _mm_shuffle_epi8 intrinsic.
+            // Shuffles bytes from `left` using `right` as pattern.
+            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shuffle_epi8
+            "pshuf.b.128" => {
+                let [left, right] =
+                    this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
+
+                let (left, left_len) = this.operand_to_simd(left)?;
+                let (right, right_len) = this.operand_to_simd(right)?;
+                let (dest, dest_len) = this.place_to_simd(dest)?;
+
+                assert_eq!(dest_len, left_len);
+                assert_eq!(dest_len, right_len);
+
+                for i in 0..dest_len {
+                    let right = this.read_scalar(&this.project_index(&right, i)?)?.to_u8()?;
+                    let dest = this.project_index(&dest, i)?;
+
+                    let res = if right & 0x80 == 0 {
+                        let j = right % 16; // index wraps around
+                        this.read_scalar(&this.project_index(&left, j.into())?)?
+                    } else {
+                        // If the highest bit in `right` is 1, write zero.
+                        Scalar::from_u8(0)
+                    };
+
+                    this.write_scalar(res, &dest)?;
+                }
+            }
+            // Used to implement the _mm_h{add,adds,sub}_epi{16,32} functions.
+            // Horizontally add / add with saturation / subtract adjacent 16/32-bit
+            // integer values in `left` and `right`.
+            "phadd.w.128" | "phadd.sw.128" | "phadd.d.128" | "phsub.w.128" | "phsub.sw.128"
+            | "phsub.d.128" => {
+                let [left, right] =
+                    this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
+
+                let (which, saturating) = match unprefixed_name {
+                    "phadd.w.128" | "phadd.d.128" => (mir::BinOp::Add, false),
+                    "phadd.sw.128" => (mir::BinOp::Add, true),
+                    "phsub.w.128" | "phsub.d.128" => (mir::BinOp::Sub, false),
+                    "phsub.sw.128" => (mir::BinOp::Sub, true),
+                    _ => unreachable!(),
+                };
+
+                horizontal_bin_op(this, which, saturating, left, right, dest)?;
+            }
+            // Used to implement the _mm_maddubs_epi16 function.
+            // Multiplies packed 8-bit unsigned integers from `left` and packed
+            // signed 8-bit integers from `right` into 16-bit signed integers. Then,
+            // the saturating sum of the products with indices `2*i` and `2*i+1`
+            // produces the output at index `i`.
+            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maddubs_epi16
+            "pmadd.ub.sw.128" => {
+                let [left, right] =
+                    this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
+
+                let (left, left_len) = this.operand_to_simd(left)?;
+                let (right, right_len) = this.operand_to_simd(right)?;
+                let (dest, dest_len) = this.place_to_simd(dest)?;
+
+                assert_eq!(left_len, right_len);
+                assert_eq!(dest_len.checked_mul(2).unwrap(), left_len);
+
+                for i in 0..dest_len {
+                    let j1 = i.checked_mul(2).unwrap();
+                    let left1 = this.read_scalar(&this.project_index(&left, j1)?)?.to_u8()?;
+                    let right1 = this.read_scalar(&this.project_index(&right, j1)?)?.to_i8()?;
+
+                    let j2 = j1.checked_add(1).unwrap();
+                    let left2 = this.read_scalar(&this.project_index(&left, j2)?)?.to_u8()?;
+                    let right2 = this.read_scalar(&this.project_index(&right, j2)?)?.to_i8()?;
+
+                    let dest = this.project_index(&dest, i)?;
+
+                    // Multiplication of a u8 and an i8 into an i16 cannot overflow.
+                    let mul1 = i16::from(left1).checked_mul(right1.into()).unwrap();
+                    let mul2 = i16::from(left2).checked_mul(right2.into()).unwrap();
+                    let res = mul1.saturating_add(mul2);
+
+                    this.write_scalar(Scalar::from_i16(res), &dest)?;
+                }
+            }
+            // Used to implement the _mm_mulhrs_epi16 function.
+            // Multiplies packed 16-bit signed integer values, truncates the 32-bit
+            // product to the 18 most significant bits by right-shifting, and then
+            // divides the 18-bit value by 2 (rounding to nearest) by first adding
+            // 1 and then taking the bits `1..=16`.
+            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mulhrs_epi16
+            "pmul.hr.sw.128" => {
+                let [left, right] =
+                    this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
+
+                let (left, left_len) = this.operand_to_simd(left)?;
+                let (right, right_len) = this.operand_to_simd(right)?;
+                let (dest, dest_len) = this.place_to_simd(dest)?;
+
+                assert_eq!(dest_len, left_len);
+                assert_eq!(dest_len, right_len);
+
+                for i in 0..dest_len {
+                    let left = this.read_scalar(&this.project_index(&left, i)?)?.to_i16()?;
+                    let right = this.read_scalar(&this.project_index(&right, i)?)?.to_i16()?;
+                    let dest = this.project_index(&dest, i)?;
+
+                    let res = (i32::from(left).checked_mul(right.into()).unwrap() >> 14)
+                        .checked_add(1)
+                        .unwrap()
+                        >> 1;
+
+                    // The result of this operation can overflow a signed 16-bit integer.
+                    // When `left` and `right` are -0x8000, the result is 0x8000.
+                    #[allow(clippy::cast_possible_truncation)]
+                    let res = res as i16;
+
+                    this.write_scalar(Scalar::from_i16(res), &dest)?;
+                }
+            }
+            // Used to implement the _mm_sign_epi{8,16,32} functions.
+            // Negates elements from `left` when the corresponding element in
+            // `right` is negative. If an element from `right` is zero, zero
+            // is writen to the corresponding output element.
+            // Basically, we multiply `left` with `right.signum()`.
+            "psign.b.128" | "psign.w.128" | "psign.d.128" => {
+                let [left, right] =
+                    this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
+
+                let (left, left_len) = this.operand_to_simd(left)?;
+                let (right, right_len) = this.operand_to_simd(right)?;
+                let (dest, dest_len) = this.place_to_simd(dest)?;
+
+                assert_eq!(dest_len, left_len);
+                assert_eq!(dest_len, right_len);
+
+                for i in 0..dest_len {
+                    let dest = this.project_index(&dest, i)?;
+                    let left = this.read_immediate(&this.project_index(&left, i)?)?;
+                    let right = this
+                        .read_scalar(&this.project_index(&right, i)?)?
+                        .to_int(dest.layout.size)?;
+
+                    let res = this.wrapping_binary_op(
+                        mir::BinOp::Mul,
+                        &left,
+                        &ImmTy::from_int(right.signum(), dest.layout),
+                    )?;
+
+                    this.write_immediate(*res, &dest)?;
+                }
+            }
+            _ => return Ok(EmulateByNameResult::NotSupported),
+        }
+        Ok(EmulateByNameResult::NeedsJumping)
+    }
+}
diff --git a/src/tools/miri/tests/pass/float.rs b/src/tools/miri/tests/pass/float.rs
index fee5ca44ffb34..70c64485fe937 100644
--- a/src/tools/miri/tests/pass/float.rs
+++ b/src/tools/miri/tests/pass/float.rs
@@ -168,6 +168,16 @@ fn basic() {
     let x: u32 = unsafe { std::mem::transmute(42.0_f32) };
     let y: f32 = unsafe { std::mem::transmute(x) };
     assert_eq(y, 42.0_f32);
+
+    // `%` sign behavior, some of this used to be buggy
+    assert!((black_box(1.0f32) % 1.0).is_sign_positive());
+    assert!((black_box(1.0f32) % -1.0).is_sign_positive());
+    assert!((black_box(-1.0f32) % 1.0).is_sign_negative());
+    assert!((black_box(-1.0f32) % -1.0).is_sign_negative());
+    assert!((black_box(1.0f64) % 1.0).is_sign_positive());
+    assert!((black_box(1.0f64) % -1.0).is_sign_positive());
+    assert!((black_box(-1.0f64) % 1.0).is_sign_negative());
+    assert!((black_box(-1.0f64) % -1.0).is_sign_negative());
 }
 
 /// Many of these test values are taken from
diff --git a/src/tools/miri/tests/pass/intrinsics-math.rs b/src/tools/miri/tests/pass/intrinsics-math.rs
index e0e4f5654d6a4..5f7730a3e86af 100644
--- a/src/tools/miri/tests/pass/intrinsics-math.rs
+++ b/src/tools/miri/tests/pass/intrinsics-math.rs
@@ -1,4 +1,5 @@
 #![feature(float_gamma)]
+use std::{f32, f64};
 
 macro_rules! assert_approx_eq {
     ($a:expr, $b:expr) => {{
@@ -15,8 +16,7 @@ fn ldexp(a: f64, b: i32) -> f64 {
 }
 
 pub fn main() {
-    use std::f32;
-    use std::f64;
+    mul_add();
 
     assert_approx_eq!(64f32.sqrt(), 8f32);
     assert_approx_eq!(64f64.sqrt(), 8f64);
@@ -48,13 +48,6 @@ pub fn main() {
     assert_approx_eq!(8f32.log2(), 3f32);
     assert_approx_eq!(f64::consts::E.log2(), f64::consts::LOG2_E);
 
-    assert_approx_eq!(3.0f32.mul_add(2.0f32, 5.0f32), 11.0);
-    assert_eq!(0.0f32.mul_add(-2.0, f32::consts::E), f32::consts::E);
-    assert_approx_eq!(3.0f64.mul_add(2.0, 5.0), 11.0);
-    assert_eq!(0.0f64.mul_add(-2.0f64, f64::consts::E), f64::consts::E);
-    assert_eq!((-3.2f32).mul_add(2.4, f32::NEG_INFINITY), f32::NEG_INFINITY);
-    assert_eq!((-3.2f64).mul_add(2.4, f64::NEG_INFINITY), f64::NEG_INFINITY);
-
     assert_approx_eq!((-1.0f32).abs(), 1.0f32);
     assert_approx_eq!(34.2f64.abs(), 34.2f64);
 
@@ -146,3 +139,19 @@ pub fn main() {
     assert_approx_eq!(val, (2.0 * f64::consts::PI.sqrt()).ln());
     assert_eq!(sign, -1);
 }
+
+fn mul_add() {
+    assert_approx_eq!(3.0f32.mul_add(2.0f32, 5.0f32), 11.0);
+    assert_eq!(0.0f32.mul_add(-2.0, f32::consts::E), f32::consts::E);
+    assert_approx_eq!(3.0f64.mul_add(2.0, 5.0), 11.0);
+    assert_eq!(0.0f64.mul_add(-2.0f64, f64::consts::E), f64::consts::E);
+    assert_eq!((-3.2f32).mul_add(2.4, f32::NEG_INFINITY), f32::NEG_INFINITY);
+    assert_eq!((-3.2f64).mul_add(2.4, f64::NEG_INFINITY), f64::NEG_INFINITY);
+
+    let f = f32::mul_add(
+        -0.000000000000000000000000000000000000014728589,
+        0.0000037105144,
+        0.000000000000000000000000000000000000000000055,
+    );
+    assert_eq!(f.to_bits(), f32::to_bits(-0.0));
+}
diff --git a/src/tools/miri/tests/pass/intrinsics-x86-sse2.rs b/src/tools/miri/tests/pass/intrinsics-x86-sse2.rs
index fa9df04d36843..2c7665bc73631 100644
--- a/src/tools/miri/tests/pass/intrinsics-x86-sse2.rs
+++ b/src/tools/miri/tests/pass/intrinsics-x86-sse2.rs
@@ -70,6 +70,24 @@ mod tests {
         }
         test_mm_avg_epu16();
 
+        #[target_feature(enable = "sse2")]
+        unsafe fn test_mm_madd_epi16() {
+            let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
+            let b = _mm_setr_epi16(9, 10, 11, 12, 13, 14, 15, 16);
+            let r = _mm_madd_epi16(a, b);
+            let e = _mm_setr_epi32(29, 81, 149, 233);
+            assert_eq_m128i(r, e);
+
+            let a =
+                _mm_setr_epi16(i16::MAX, i16::MAX, i16::MIN, i16::MIN, i16::MIN, i16::MAX, 0, 0);
+            let b =
+                _mm_setr_epi16(i16::MAX, i16::MAX, i16::MIN, i16::MIN, i16::MAX, i16::MIN, 0, 0);
+            let r = _mm_madd_epi16(a, b);
+            let e = _mm_setr_epi32(0x7FFE0002, i32::MIN, -0x7FFF0000, 0);
+            assert_eq_m128i(r, e);
+        }
+        test_mm_madd_epi16();
+
         #[target_feature(enable = "sse2")]
         unsafe fn test_mm_mulhi_epi16() {
             let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(-1001));
diff --git a/src/tools/miri/tests/pass/intrinsics-x86-sse3-ssse3.rs b/src/tools/miri/tests/pass/intrinsics-x86-sse3-ssse3.rs
new file mode 100644
index 0000000000000..0805d9bc30039
--- /dev/null
+++ b/src/tools/miri/tests/pass/intrinsics-x86-sse3-ssse3.rs
@@ -0,0 +1,395 @@
+// Ignore everything except x86 and x86_64
+// Any additional target are added to CI should be ignored here
+// (We cannot use `cfg`-based tricks here since the `target-feature` flags below only work on x86.)
+//@ignore-target-aarch64
+//@ignore-target-arm
+//@ignore-target-avr
+//@ignore-target-s390x
+//@ignore-target-thumbv7em
+//@ignore-target-wasm32
+// SSSE3 implicitly enables SSE3
+//@compile-flags: -C target-feature=+ssse3
+
+use core::mem::transmute;
+#[cfg(target_arch = "x86")]
+use std::arch::x86::*;
+#[cfg(target_arch = "x86_64")]
+use std::arch::x86_64::*;
+
+fn main() {
+    // SSSE3 implicitly enables SSE3, still check it to be sure
+    assert!(is_x86_feature_detected!("sse3"));
+    assert!(is_x86_feature_detected!("ssse3"));
+
+    unsafe {
+        test_sse3();
+        test_ssse3();
+    }
+}
+
+#[target_feature(enable = "sse3")]
+unsafe fn test_sse3() {
+    // Mostly copied from library/stdarch/crates/core_arch/src/x86/sse3.rs
+
+    #[target_feature(enable = "sse3")]
+    unsafe fn test_mm_addsub_ps() {
+        let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
+        let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
+        let r = _mm_addsub_ps(a, b);
+        assert_eq_m128(r, _mm_setr_ps(99.0, 25.0, 0.0, -15.0));
+    }
+    test_mm_addsub_ps();
+
+    #[target_feature(enable = "sse3")]
+    unsafe fn test_mm_addsub_pd() {
+        let a = _mm_setr_pd(-1.0, 5.0);
+        let b = _mm_setr_pd(-100.0, 20.0);
+        let r = _mm_addsub_pd(a, b);
+        assert_eq_m128d(r, _mm_setr_pd(99.0, 25.0));
+    }
+    test_mm_addsub_pd();
+
+    #[target_feature(enable = "sse3")]
+    unsafe fn test_mm_hadd_ps() {
+        let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
+        let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
+        let r = _mm_hadd_ps(a, b);
+        assert_eq_m128(r, _mm_setr_ps(4.0, -10.0, -80.0, -5.0));
+    }
+    test_mm_hadd_ps();
+
+    #[target_feature(enable = "sse3")]
+    unsafe fn test_mm_hadd_pd() {
+        let a = _mm_setr_pd(-1.0, 5.0);
+        let b = _mm_setr_pd(-100.0, 20.0);
+        let r = _mm_hadd_pd(a, b);
+        assert_eq_m128d(r, _mm_setr_pd(4.0, -80.0));
+    }
+    test_mm_hadd_pd();
+
+    #[target_feature(enable = "sse3")]
+    unsafe fn test_mm_hsub_ps() {
+        let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
+        let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
+        let r = _mm_hsub_ps(a, b);
+        assert_eq_m128(r, _mm_setr_ps(-6.0, 10.0, -120.0, 5.0));
+    }
+    test_mm_hsub_ps();
+
+    #[target_feature(enable = "sse3")]
+    unsafe fn test_mm_hsub_pd() {
+        let a = _mm_setr_pd(-1.0, 5.0);
+        let b = _mm_setr_pd(-100.0, 20.0);
+        let r = _mm_hsub_pd(a, b);
+        assert_eq_m128d(r, _mm_setr_pd(-6.0, -120.0));
+    }
+    test_mm_hsub_pd();
+
+    #[target_feature(enable = "sse3")]
+    unsafe fn test_mm_lddqu_si128() {
+        let a = _mm_setr_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+        let r = _mm_lddqu_si128(&a);
+        assert_eq_m128i(a, r);
+    }
+    test_mm_lddqu_si128();
+}
+
+#[target_feature(enable = "ssse3")]
+unsafe fn test_ssse3() {
+    // Mostly copied from library/stdarch/crates/core_arch/src/x86/ssse3.rs
+
+    #[target_feature(enable = "ssse3")]
+    unsafe fn test_mm_abs_epi8() {
+        let r = _mm_abs_epi8(_mm_set1_epi8(-5));
+        assert_eq_m128i(r, _mm_set1_epi8(5));
+    }
+    test_mm_abs_epi8();
+
+    #[target_feature(enable = "ssse3")]
+    unsafe fn test_mm_abs_epi16() {
+        let r = _mm_abs_epi16(_mm_set1_epi16(-5));
+        assert_eq_m128i(r, _mm_set1_epi16(5));
+    }
+    test_mm_abs_epi16();
+
+    #[target_feature(enable = "ssse3")]
+    unsafe fn test_mm_abs_epi32() {
+        let r = _mm_abs_epi32(_mm_set1_epi32(-5));
+        assert_eq_m128i(r, _mm_set1_epi32(5));
+    }
+    test_mm_abs_epi32();
+
+    #[target_feature(enable = "ssse3")]
+    unsafe fn test_mm_shuffle_epi8() {
+        let a = _mm_setr_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+        let b = _mm_setr_epi8(4, 128_u8 as i8, 4, 3, 24, 12, 6, 19, 12, 5, 5, 10, 4, 1, 8, 0);
+        let expected = _mm_setr_epi8(5, 0, 5, 4, 9, 13, 7, 4, 13, 6, 6, 11, 5, 2, 9, 1);
+        let r = _mm_shuffle_epi8(a, b);
+        assert_eq_m128i(r, expected);
+
+        // Test indices greater than 15 wrapping around
+        let b = _mm_add_epi8(b, _mm_set1_epi8(32));
+        let r = _mm_shuffle_epi8(a, b);
+        assert_eq_m128i(r, expected);
+    }
+    test_mm_shuffle_epi8();
+
+    #[target_feature(enable = "ssse3")]
+    unsafe fn test_mm_hadd_epi16() {
+        let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
+        let b = _mm_setr_epi16(4, 128, 4, 3, 24, 12, 6, 19);
+        let expected = _mm_setr_epi16(3, 7, 11, 15, 132, 7, 36, 25);
+        let r = _mm_hadd_epi16(a, b);
+        assert_eq_m128i(r, expected);
+
+        // Test wrapping on overflow
+        let a = _mm_setr_epi16(i16::MAX, 1, i16::MAX, 2, i16::MAX, 3, i16::MAX, 4);
+        let b = _mm_setr_epi16(i16::MIN, -1, i16::MIN, -2, i16::MIN, -3, i16::MIN, -4);
+        let expected = _mm_setr_epi16(
+            i16::MIN,
+            i16::MIN + 1,
+            i16::MIN + 2,
+            i16::MIN + 3,
+            i16::MAX,
+            i16::MAX - 1,
+            i16::MAX - 2,
+            i16::MAX - 3,
+        );
+        let r = _mm_hadd_epi16(a, b);
+        assert_eq_m128i(r, expected);
+    }
+    test_mm_hadd_epi16();
+
+    #[target_feature(enable = "ssse3")]
+    unsafe fn test_mm_hadds_epi16() {
+        let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
+        let b = _mm_setr_epi16(4, 128, 4, 3, 32767, 1, -32768, -1);
+        let expected = _mm_setr_epi16(3, 7, 11, 15, 132, 7, 32767, -32768);
+        let r = _mm_hadds_epi16(a, b);
+        assert_eq_m128i(r, expected);
+
+        // Test saturating on overflow
+        let a = _mm_setr_epi16(i16::MAX, 1, i16::MAX, 2, i16::MAX, 3, i16::MAX, 4);
+        let b = _mm_setr_epi16(i16::MIN, -1, i16::MIN, -2, i16::MIN, -3, i16::MIN, -4);
+        let expected = _mm_setr_epi16(
+            i16::MAX,
+            i16::MAX,
+            i16::MAX,
+            i16::MAX,
+            i16::MIN,
+            i16::MIN,
+            i16::MIN,
+            i16::MIN,
+        );
+        let r = _mm_hadds_epi16(a, b);
+        assert_eq_m128i(r, expected);
+    }
+    test_mm_hadds_epi16();
+
+    #[target_feature(enable = "ssse3")]
+    unsafe fn test_mm_hadd_epi32() {
+        let a = _mm_setr_epi32(1, 2, 3, 4);
+        let b = _mm_setr_epi32(4, 128, 4, 3);
+        let expected = _mm_setr_epi32(3, 7, 132, 7);
+        let r = _mm_hadd_epi32(a, b);
+        assert_eq_m128i(r, expected);
+
+        // Test wrapping on overflow
+        let a = _mm_setr_epi32(i32::MAX, 1, i32::MAX, 2);
+        let b = _mm_setr_epi32(i32::MIN, -1, i32::MIN, -2);
+        let expected = _mm_setr_epi32(i32::MIN, i32::MIN + 1, i32::MAX, i32::MAX - 1);
+        let r = _mm_hadd_epi32(a, b);
+        assert_eq_m128i(r, expected);
+    }
+    test_mm_hadd_epi32();
+
+    #[target_feature(enable = "ssse3")]
+    unsafe fn test_mm_hsub_epi16() {
+        let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
+        let b = _mm_setr_epi16(4, 128, 4, 3, 24, 12, 6, 19);
+        let expected = _mm_setr_epi16(-1, -1, -1, -1, -124, 1, 12, -13);
+        let r = _mm_hsub_epi16(a, b);
+        assert_eq_m128i(r, expected);
+
+        // Test wrapping on overflow
+        let a = _mm_setr_epi16(i16::MAX, -1, i16::MAX, -2, i16::MAX, -3, i16::MAX, -4);
+        let b = _mm_setr_epi16(i16::MIN, 1, i16::MIN, 2, i16::MIN, 3, i16::MIN, 4);
+        let expected = _mm_setr_epi16(
+            i16::MIN,
+            i16::MIN + 1,
+            i16::MIN + 2,
+            i16::MIN + 3,
+            i16::MAX,
+            i16::MAX - 1,
+            i16::MAX - 2,
+            i16::MAX - 3,
+        );
+        let r = _mm_hsub_epi16(a, b);
+        assert_eq_m128i(r, expected);
+    }
+    test_mm_hsub_epi16();
+
+    #[target_feature(enable = "ssse3")]
+    unsafe fn test_mm_hsubs_epi16() {
+        let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
+        let b = _mm_setr_epi16(4, 128, 4, 3, 32767, -1, -32768, 1);
+        let expected = _mm_setr_epi16(-1, -1, -1, -1, -124, 1, 32767, -32768);
+        let r = _mm_hsubs_epi16(a, b);
+        assert_eq_m128i(r, expected);
+
+        // Test saturating on overflow
+        let a = _mm_setr_epi16(i16::MAX, -1, i16::MAX, -2, i16::MAX, -3, i16::MAX, -4);
+        let b = _mm_setr_epi16(i16::MIN, 1, i16::MIN, 2, i16::MIN, 3, i16::MIN, 4);
+        let expected = _mm_setr_epi16(
+            i16::MAX,
+            i16::MAX,
+            i16::MAX,
+            i16::MAX,
+            i16::MIN,
+            i16::MIN,
+            i16::MIN,
+            i16::MIN,
+        );
+        let r = _mm_hsubs_epi16(a, b);
+        assert_eq_m128i(r, expected);
+    }
+    test_mm_hsubs_epi16();
+
+    #[target_feature(enable = "ssse3")]
+    unsafe fn test_mm_hsub_epi32() {
+        let a = _mm_setr_epi32(1, 2, 3, 4);
+        let b = _mm_setr_epi32(4, 128, 4, 3);
+        let expected = _mm_setr_epi32(-1, -1, -124, 1);
+        let r = _mm_hsub_epi32(a, b);
+        assert_eq_m128i(r, expected);
+
+        // Test wrapping on overflow
+        let a = _mm_setr_epi32(i32::MAX, -1, i32::MAX, -2);
+        let b = _mm_setr_epi32(i32::MIN, 1, i32::MIN, 2);
+        let expected = _mm_setr_epi32(i32::MIN, i32::MIN + 1, i32::MAX, i32::MAX - 1);
+        let r = _mm_hsub_epi32(a, b);
+        assert_eq_m128i(r, expected);
+    }
+    test_mm_hsub_epi32();
+
+    #[target_feature(enable = "ssse3")]
+    unsafe fn test_mm_maddubs_epi16() {
+        let a = _mm_setr_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+        let b = _mm_setr_epi8(4, 63, 4, 3, 24, 12, 6, 19, 12, 5, 5, 10, 4, 1, 8, 0);
+        let expected = _mm_setr_epi16(130, 24, 192, 194, 158, 175, 66, 120);
+        let r = _mm_maddubs_epi16(a, b);
+        assert_eq_m128i(r, expected);
+
+        // Test widening and saturation
+        let a = _mm_setr_epi8(
+            u8::MAX as i8,
+            u8::MAX as i8,
+            u8::MAX as i8,
+            u8::MAX as i8,
+            u8::MAX as i8,
+            u8::MAX as i8,
+            100,
+            100,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+        );
+        let b = _mm_setr_epi8(
+            i8::MAX,
+            i8::MAX,
+            i8::MAX,
+            i8::MIN,
+            i8::MIN,
+            i8::MIN,
+            50,
+            15,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+        );
+        let expected = _mm_setr_epi16(i16::MAX, -255, i16::MIN, 6500, 0, 0, 0, 0);
+        let r = _mm_maddubs_epi16(a, b);
+        assert_eq_m128i(r, expected);
+    }
+    test_mm_maddubs_epi16();
+
+    #[target_feature(enable = "ssse3")]
+    unsafe fn test_mm_mulhrs_epi16() {
+        let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
+        let b = _mm_setr_epi16(4, 128, 4, 3, 32767, -1, -32768, 1);
+        let expected = _mm_setr_epi16(0, 0, 0, 0, 5, 0, -7, 0);
+        let r = _mm_mulhrs_epi16(a, b);
+        assert_eq_m128i(r, expected);
+
+        // Test extreme values
+        let a = _mm_setr_epi16(i16::MAX, i16::MIN, i16::MIN, 0, 0, 0, 0, 0);
+        let b = _mm_setr_epi16(i16::MAX, i16::MIN, i16::MAX, 0, 0, 0, 0, 0);
+        let expected = _mm_setr_epi16(i16::MAX - 1, i16::MIN, -i16::MAX, 0, 0, 0, 0, 0);
+        let r = _mm_mulhrs_epi16(a, b);
+        assert_eq_m128i(r, expected);
+    }
+    test_mm_mulhrs_epi16();
+
+    #[target_feature(enable = "ssse3")]
+    unsafe fn test_mm_sign_epi8() {
+        let a = _mm_setr_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, -14, -15, 16);
+        let b = _mm_setr_epi8(4, 63, -4, 3, 24, 12, -6, -19, 12, 5, -5, 10, 4, 1, -8, 0);
+        let expected = _mm_setr_epi8(1, 2, -3, 4, 5, 6, -7, -8, 9, 10, -11, 12, 13, -14, 15, 0);
+        let r = _mm_sign_epi8(a, b);
+        assert_eq_m128i(r, expected);
+    }
+    test_mm_sign_epi8();
+
+    #[target_feature(enable = "ssse3")]
+    unsafe fn test_mm_sign_epi16() {
+        let a = _mm_setr_epi16(1, 2, 3, 4, -5, -6, 7, 8);
+        let b = _mm_setr_epi16(4, 128, 0, 3, 1, -1, -2, 1);
+        let expected = _mm_setr_epi16(1, 2, 0, 4, -5, 6, -7, 8);
+        let r = _mm_sign_epi16(a, b);
+        assert_eq_m128i(r, expected);
+    }
+    test_mm_sign_epi16();
+
+    #[target_feature(enable = "ssse3")]
+    unsafe fn test_mm_sign_epi32() {
+        let a = _mm_setr_epi32(-1, 2, 3, 4);
+        let b = _mm_setr_epi32(1, -1, 1, 0);
+        let expected = _mm_setr_epi32(-1, -2, 3, 0);
+        let r = _mm_sign_epi32(a, b);
+        assert_eq_m128i(r, expected);
+    }
+    test_mm_sign_epi32();
+}
+
+#[track_caller]
+#[target_feature(enable = "sse")]
+unsafe fn assert_eq_m128(a: __m128, b: __m128) {
+    let r = _mm_cmpeq_ps(a, b);
+    if _mm_movemask_ps(r) != 0b1111 {
+        panic!("{:?} != {:?}", a, b);
+    }
+}
+
+#[track_caller]
+#[target_feature(enable = "sse2")]
+unsafe fn assert_eq_m128d(a: __m128d, b: __m128d) {
+    if _mm_movemask_pd(_mm_cmpeq_pd(a, b)) != 0b11 {
+        panic!("{:?} != {:?}", a, b);
+    }
+}
+
+#[track_caller]
+#[target_feature(enable = "sse2")]
+pub unsafe fn assert_eq_m128i(a: __m128i, b: __m128i) {
+    assert_eq!(transmute::<_, [u64; 2]>(a), transmute::<_, [u64; 2]>(b))
+}
diff --git a/src/tools/miri/tests/pass/portable-simd.rs b/src/tools/miri/tests/pass/portable-simd.rs
index c1f5618c16346..969162e2c1e9e 100644
--- a/src/tools/miri/tests/pass/portable-simd.rs
+++ b/src/tools/miri/tests/pass/portable-simd.rs
@@ -416,20 +416,14 @@ fn simd_intrinsics() {
             simd_select(i8x4::from_array([0, -1, -1, 0]), b, a),
             i32x4::from_array([10, 2, 10, 10])
         );
+        assert_eq!(simd_shuffle_generic::<_, i32x4, { &[3, 1, 0, 2] }>(a, b), a,);
+        assert_eq!(simd_shuffle::<_, _, i32x4>(a, b, const { [3, 1, 0, 2] }), a,);
         assert_eq!(
-            simd_shuffle_generic::<_, i32x4, {&[3, 1, 0, 2]}>(a, b),
-            a,
-        );
-        assert_eq!(
-            simd_shuffle::<_, _, i32x4>(a, b, const {[3, 1, 0, 2]}),
-            a,
-        );
-        assert_eq!(
-            simd_shuffle_generic::<_, i32x4, {&[7, 5, 4, 6]}>(a, b),
+            simd_shuffle_generic::<_, i32x4, { &[7, 5, 4, 6] }>(a, b),
             i32x4::from_array([4, 2, 1, 10]),
         );
         assert_eq!(
-            simd_shuffle::<_, _, i32x4>(a, b, const {[7, 5, 4, 6]}),
+            simd_shuffle::<_, _, i32x4>(a, b, const { [7, 5, 4, 6] }),
             i32x4::from_array([4, 2, 1, 10]),
         );
     }