Skip to content

Commit

Permalink
Auto merge of rust-lang#118077 - calebzulawski:sync-portable-simd-202…
Browse files Browse the repository at this point in the history
…3-11-19, r=workingjubilee

Portable SIMD subtree update

Syncs nightly to the latest changes from rust-lang/portable-simd

r? `@rust-lang/libs`
  • Loading branch information
bors committed Dec 2, 2023
2 parents 0908f17 + bcb1c41 commit 3f1e30a
Show file tree
Hide file tree
Showing 112 changed files with 2,289 additions and 1,190 deletions.
3 changes: 2 additions & 1 deletion library/core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -446,9 +446,10 @@ pub mod arch;
#[unstable(feature = "portable_simd", issue = "86656")]
mod core_simd;

#[doc = include_str!("../../portable-simd/crates/core_simd/src/core_simd_docs.md")]
#[unstable(feature = "portable_simd", issue = "86656")]
pub mod simd {
#![doc = include_str!("../../portable-simd/crates/core_simd/src/core_simd_docs.md")]

#[unstable(feature = "portable_simd", issue = "86656")]
pub use crate::core_simd::simd::*;
}
Expand Down
3 changes: 1 addition & 2 deletions library/core/src/slice/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3979,7 +3979,7 @@ impl<T> [T] {
///
/// ```
/// #![feature(portable_simd)]
/// use core::simd::SimdFloat;
/// use core::simd::prelude::*;
///
/// let short = &[1, 2, 3];
/// let (prefix, middle, suffix) = short.as_simd::<4>();
Expand All @@ -3991,7 +3991,6 @@ impl<T> [T] {
///
/// fn basic_simd_sum(x: &[f32]) -> f32 {
/// use std::ops::Add;
/// use std::simd::f32x4;
/// let (prefix, middle, suffix) = x.as_simd();
/// let sums = f32x4::from_array([
/// prefix.iter().copied().sum(),
Expand Down
20 changes: 10 additions & 10 deletions library/core/src/str/pattern.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1740,9 +1740,9 @@ fn simd_contains(needle: &str, haystack: &str) -> Option<bool> {
debug_assert!(needle.len() > 1);

use crate::ops::BitAnd;
use crate::simd::cmp::SimdPartialEq;
use crate::simd::mask8x16 as Mask;
use crate::simd::u8x16 as Block;
use crate::simd::{SimdPartialEq, ToBitMask};

let first_probe = needle[0];
let last_byte_offset = needle.len() - 1;
Expand All @@ -1765,7 +1765,7 @@ fn simd_contains(needle: &str, haystack: &str) -> Option<bool> {
};

// do a naive search if the haystack is too small to fit
if haystack.len() < Block::LANES + last_byte_offset {
if haystack.len() < Block::LEN + last_byte_offset {
return Some(haystack.windows(needle.len()).any(|c| c == needle));
}

Expand Down Expand Up @@ -1812,7 +1812,7 @@ fn simd_contains(needle: &str, haystack: &str) -> Option<bool> {
let eq_first: Mask = a.simd_eq(first_probe);
let eq_last: Mask = b.simd_eq(second_probe);
let both = eq_first.bitand(eq_last);
let mask = both.to_bitmask();
let mask = both.to_bitmask() as u16;

return mask;
};
Expand All @@ -1822,32 +1822,32 @@ fn simd_contains(needle: &str, haystack: &str) -> Option<bool> {
// The loop condition must ensure that there's enough headroom to read LANE bytes,
// and not only at the current index but also at the index shifted by block_offset
const UNROLL: usize = 4;
while i + last_byte_offset + UNROLL * Block::LANES < haystack.len() && !result {
while i + last_byte_offset + UNROLL * Block::LEN < haystack.len() && !result {
let mut masks = [0u16; UNROLL];
for j in 0..UNROLL {
masks[j] = test_chunk(i + j * Block::LANES);
masks[j] = test_chunk(i + j * Block::LEN);
}
for j in 0..UNROLL {
let mask = masks[j];
if mask != 0 {
result |= check_mask(i + j * Block::LANES, mask, result);
result |= check_mask(i + j * Block::LEN, mask, result);
}
}
i += UNROLL * Block::LANES;
i += UNROLL * Block::LEN;
}
while i + last_byte_offset + Block::LANES < haystack.len() && !result {
while i + last_byte_offset + Block::LEN < haystack.len() && !result {
let mask = test_chunk(i);
if mask != 0 {
result |= check_mask(i, mask, result);
}
i += Block::LANES;
i += Block::LEN;
}

// Process the tail that didn't fit into LANES-sized steps.
// This simply repeats the same procedure but as right-aligned chunk instead
// of a left-aligned one. The last byte must be exactly flush with the string end so
// we don't miss a single byte or read out of bounds.
let i = haystack.len() - last_byte_offset - Block::LANES;
let i = haystack.len() - last_byte_offset - Block::LEN;
let mask = test_chunk(i);
if mask != 0 {
result |= check_mask(i, mask, result);
Expand Down
3 changes: 1 addition & 2 deletions library/core/tests/simd.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
use core::simd::f32x4;
use core::simd::SimdFloat;
use core::simd::prelude::*;

#[test]
fn testing() {
Expand Down
71 changes: 38 additions & 33 deletions library/portable-simd/.github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -167,40 +167,33 @@ jobs:
RUSTFLAGS: ${{ matrix.rustflags }}

cross-tests:
name: "${{ matrix.target }} (via cross)"
name: "${{ matrix.target_feature }} on ${{ matrix.target }} (via cross)"
runs-on: ubuntu-latest
strategy:
fail-fast: false
# TODO: Sadly, we cant configure target-feature in a meaningful way
# because `cross` doesn't tell qemu to enable any non-default cpu
# features, nor does it give us a way to do so.
#
# Ultimately, we'd like to do something like [rust-lang/stdarch][stdarch].
# This is a lot more complex... but in practice it's likely that we can just
# snarf the docker config from around [here][1000-dockerfiles].
#
# [stdarch]: https://github.com/rust-lang/stdarch/blob/a5db4eaf/.github/workflows/main.yml#L67
# [1000-dockerfiles]: https://github.com/rust-lang/stdarch/tree/a5db4eaf/ci/docker

matrix:
target:
- i586-unknown-linux-gnu
# 32-bit arm has a few idiosyncracies like having subnormal flushing
# to zero on by default. Ideally we'd set
- armv7-unknown-linux-gnueabihf
- aarch64-unknown-linux-gnu
# Note: The issue above means neither of these mips targets will use
# MSA (mips simd) but MIPS uses a nonstandard binary representation
# for NaNs which makes it worth testing on despite that.
- thumbv7neon-unknown-linux-gnueabihf # includes neon by default
- aarch64-unknown-linux-gnu # includes neon by default
- powerpc-unknown-linux-gnu
- powerpc64le-unknown-linux-gnu # includes altivec by default
- riscv64gc-unknown-linux-gnu
# MIPS uses a nonstandard binary representation for NaNs which makes it worth testing
# non-nightly since https://github.com/rust-lang/rust/pull/113274
# - mips-unknown-linux-gnu
# - mips64-unknown-linux-gnuabi64
- riscv64gc-unknown-linux-gnu
# TODO this test works, but it appears to time out
# - powerpc-unknown-linux-gnu
# TODO this test is broken, but it appears to be a problem with QEMU, not us.
# - powerpc64le-unknown-linux-gnu
# TODO enable this once a new version of cross is released
# Lots of errors in QEMU and no real hardware to test on. Not clear if it's QEMU or bad codegen.
# - powerpc64-unknown-linux-gnu
target_feature: [default]
include:
- { target: powerpc64le-unknown-linux-gnu, target_feature: "+vsx" }
# Fails due to QEMU floating point errors, probably handling subnormals incorrectly.
# This target is somewhat redundant, since ppc64le has altivec as well.
# - { target: powerpc-unknown-linux-gnu, target_feature: "+altivec" }
# We should test this, but cross currently can't run it
# - { target: riscv64gc-unknown-linux-gnu, target_feature: "+v,+zvl128b" }

steps:
- uses: actions/checkout@v2
Expand All @@ -217,19 +210,35 @@ jobs:
# being part of the tarball means we can't just use the download/latest
# URL :(
run: |
CROSS_URL=https://github.com/rust-embedded/cross/releases/download/v0.2.1/cross-v0.2.1-x86_64-unknown-linux-gnu.tar.gz
CROSS_URL=https://github.com/cross-rs/cross/releases/download/v0.2.5/cross-x86_64-unknown-linux-gnu.tar.gz
mkdir -p "$HOME/.bin"
curl -sfSL --retry-delay 10 --retry 5 "${CROSS_URL}" | tar zxf - -C "$HOME/.bin"
echo "$HOME/.bin" >> $GITHUB_PATH
- name: Configure Emulated CPUs
run: |
echo "CARGO_TARGET_POWERPC_UNKNOWN_LINUX_GNU_RUNNER=qemu-ppc -cpu e600" >> $GITHUB_ENV
# echo "CARGO_TARGET_RISCV64GC_UNKNOWN_LINUX_GNU_RUNNER=qemu-riscv64 -cpu rv64,zba=true,zbb=true,v=true,vlen=256,vext_spec=v1.0" >> $GITHUB_ENV
- name: Configure RUSTFLAGS
shell: bash
run: |
case "${{ matrix.target_feature }}" in
default)
echo "RUSTFLAGS=" >> $GITHUB_ENV;;
*)
echo "RUSTFLAGS=-Ctarget-feature=${{ matrix.target_feature }}" >> $GITHUB_ENV
;;
esac
- name: Test (debug)
run: cross test --verbose --target=${{ matrix.target }}

- name: Test (release)
run: cross test --verbose --target=${{ matrix.target }} --release

features:
name: "Check cargo features (${{ matrix.simd }} × ${{ matrix.features }})"
name: "Test cargo features (${{ matrix.simd }} × ${{ matrix.features }})"
runs-on: ubuntu-latest
strategy:
fail-fast: false
Expand All @@ -240,12 +249,8 @@ jobs:
features:
- ""
- "--features std"
- "--features generic_const_exprs"
- "--features std --features generic_const_exprs"
- "--features all_lane_counts"
- "--features all_lane_counts --features std"
- "--features all_lane_counts --features generic_const_exprs"
- "--features all_lane_counts --features std --features generic_const_exprs"
- "--all-features"

steps:
- uses: actions/checkout@v2
Expand All @@ -257,9 +262,9 @@ jobs:
run: echo "CPU_FEATURE=$(lscpu | grep -o avx512[a-z]* | sed s/avx/+avx/ | tr '\n' ',' )" >> $GITHUB_ENV
- name: Check build
if: ${{ matrix.simd == '' }}
run: RUSTFLAGS="-Dwarnings" cargo check --all-targets --no-default-features ${{ matrix.features }}
run: RUSTFLAGS="-Dwarnings" cargo test --all-targets --no-default-features ${{ matrix.features }}
- name: Check AVX
if: ${{ matrix.simd == 'avx512' && contains(env.CPU_FEATURE, 'avx512') }}
run: |
echo "Found AVX features: $CPU_FEATURE"
RUSTFLAGS="-Dwarnings -Ctarget-feature=$CPU_FEATURE" cargo check --all-targets --no-default-features ${{ matrix.features }}
RUSTFLAGS="-Dwarnings -Ctarget-feature=$CPU_FEATURE" cargo test --all-targets --no-default-features ${{ matrix.features }}
1 change: 0 additions & 1 deletion library/portable-simd/.gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1 @@
/target
Cargo.lock

0 comments on commit 3f1e30a

Please sign in to comment.