diff --git a/Cargo.toml b/Cargo.toml index 0002cf5f9..4a65abf34 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -45,7 +45,7 @@ rust-version = "1.87" [workspace.dependencies] # workspace-internal numcodecs crates -numcodecs = { version = "0.3", path = "crates/numcodecs", default-features = false } +numcodecs = { version = "0.3.1", path = "crates/numcodecs", default-features = false } numcodecs-python = { version = "0.7.1", path = "crates/numcodecs-python", default-features = false } numcodecs-wasm-builder = { version = "0.2", path = "crates/numcodecs-wasm-builder", default-features = false } numcodecs-wasm-guest = { version = "0.3", path = "crates/numcodecs-wasm-guest", default-features = false } @@ -72,7 +72,7 @@ numcodecs-round = { version = "0.5", path = "codecs/round", default-features = f numcodecs-sperr = { version = "0.2", path = "codecs/sperr", default-features = false } numcodecs-stochastic-rounding = { version = "0.2", path = "codecs/stochastic-rounding", default-features = false } numcodecs-swizzle-reshape = { version = "0.4", path = "codecs/swizzle-reshape", default-features = false } -numcodecs-sz3 = { version = "0.7", path = "codecs/sz3", default-features = false } +numcodecs-sz3 = { version = "0.8", path = "codecs/sz3", default-features = false } numcodecs-tthresh = { version = "0.3", path = "codecs/tthresh", default-features = false } numcodecs-uniform-noise = { version = "0.4", path = "codecs/uniform-noise", default-features = false } numcodecs-zfp = { version = "0.6", path = "codecs/zfp", default-features = false } @@ -114,8 +114,8 @@ serde_json = { version = "1.0.140", default-features = false } serde_repr = { version = "0.1.3", default-features = false } simple_logger = { version = "5.0", default-features = false } sperr = { version = "0.2", default-features = false } -sz3 = { version = "0.3", default-features = false } -thiserror = { version = "2.0.12", default-features = false } +sz3 = { version = "0.4.2", default-features = false } +thiserror = { version = "2.0.17", default-features = false } tthresh = { version = "0.1", default-features = false } twofloat = { version = "0.8", default-features = false } vecmap-rs = { version = "0.2", default-features = false } @@ -134,7 +134,7 @@ wit-parser = { version = "0.240", default-features = false } wyhash = { version = "0.6", default-features = false } zfp-sys = { version = "0.4.2", default-features = false } zstd = { version = "0.13", default-features = false } -zstd-sys = { version = "2.0.12", default-features = false } +zstd-sys = { version = "2.0.16", default-features = false } # git third-party dependencies with non-upstream fixes wasm_component_layer = { git = "https://github.com/juntyr/wasm_component_layer.git", rev = "ad80189", version = "0.1", default-features = false } diff --git a/codecs/jpeg2000/src/lib.rs b/codecs/jpeg2000/src/lib.rs index 8c021e1c2..8f3211db7 100644 --- a/codecs/jpeg2000/src/lib.rs +++ b/codecs/jpeg2000/src/lib.rs @@ -390,7 +390,7 @@ pub fn decompress(encoded: &[u8]) -> Result { })?; // Return empty data for zero-size arrays - if header.shape.iter().copied().product::() == 0 { + if header.shape.iter().copied().any(|s| s == 0) { return match header.dtype { Jpeg2000DType::I8 => Ok(AnyArray::I8(Array::zeros(&*header.shape))), Jpeg2000DType::U8 => Ok(AnyArray::U8(Array::zeros(&*header.shape))), diff --git a/codecs/qpet-sperr/src/lib.rs b/codecs/qpet-sperr/src/lib.rs index 69f5fb22c..929b8a645 100644 --- a/codecs/qpet-sperr/src/lib.rs +++ b/codecs/qpet-sperr/src/lib.rs @@ -421,7 +421,7 @@ pub fn decompress(encoded: &[u8]) -> Result { })?; // Return empty data for zero-size arrays - if header.shape.iter().copied().product::() == 0 { + if header.shape.iter().copied().any(|s| s == 0) { return match header.dtype { QpetSperrDType::F32 => Ok(AnyArray::F32(Array::zeros(&*header.shape))), QpetSperrDType::F64 => Ok(AnyArray::F64(Array::zeros(&*header.shape))), diff --git a/codecs/qpet-sperr/tests/config.rs b/codecs/qpet-sperr/tests/config.rs index 0ec981932..ac5e7a876 100644 --- a/codecs/qpet-sperr/tests/config.rs +++ b/codecs/qpet-sperr/tests/config.rs @@ -5,9 +5,6 @@ use ::{ zstd_sys as _, }; -#[cfg(target_arch = "wasm32")] -use ::gmp_mpfr_sys as _; - use numcodecs::StaticCodec; use numcodecs_qpet_sperr::{QpetSperrCodec, QpetSperrCompressionMode}; use serde::Deserialize; diff --git a/codecs/qpet-sperr/tests/schema.rs b/codecs/qpet-sperr/tests/schema.rs index 885d3d99f..6b95744e1 100644 --- a/codecs/qpet-sperr/tests/schema.rs +++ b/codecs/qpet-sperr/tests/schema.rs @@ -5,9 +5,6 @@ use ::{ serde_json as _, thiserror as _, zstd_sys as _, }; -#[cfg(target_arch = "wasm32")] -use ::gmp_mpfr_sys as _; - use numcodecs::{DynCodecType, StaticCodecType}; use numcodecs_qpet_sperr::QpetSperrCodec; diff --git a/codecs/sperr/src/lib.rs b/codecs/sperr/src/lib.rs index 68a885de7..7b6435c8f 100644 --- a/codecs/sperr/src/lib.rs +++ b/codecs/sperr/src/lib.rs @@ -390,7 +390,7 @@ pub fn decompress(encoded: &[u8]) -> Result { })?; // Return empty data for zero-size arrays - if header.shape.iter().copied().product::() == 0 { + if header.shape.iter().copied().any(|s| s == 0) { return match header.dtype { SperrDType::F32 => Ok(AnyArray::F32(Array::zeros(&*header.shape))), SperrDType::F64 => Ok(AnyArray::F64(Array::zeros(&*header.shape))), diff --git a/codecs/sz3/Cargo.toml b/codecs/sz3/Cargo.toml index 9c25252f5..580e675fa 100644 --- a/codecs/sz3/Cargo.toml +++ b/codecs/sz3/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "numcodecs-sz3" -version = "0.7.0" +version = "0.8.0" edition = { workspace = true } authors = { workspace = true } repository = { workspace = true } @@ -26,6 +26,7 @@ thiserror = { workspace = true } zstd-sys = { workspace = true, features = ["no_wasm_shim"] } [dev-dependencies] +num-traits = { workspace = true, features = ["std"] } serde_json = { workspace = true, features = ["std"] } [lints] diff --git a/codecs/sz3/src/lib.rs b/codecs/sz3/src/lib.rs index f95de9fc4..0c44bc041 100644 --- a/codecs/sz3/src/lib.rs +++ b/codecs/sz3/src/lib.rs @@ -21,10 +21,10 @@ use std::{borrow::Cow, fmt}; -use ndarray::{Array, Array1, ArrayBase, Data, Dimension, ShapeError}; +use ndarray::{Array, Array1, ArrayBase, ArrayViewMut, Data, Dimension, IxDyn, ShapeError}; use numcodecs::{ AnyArray, AnyArrayAssignError, AnyArrayDType, AnyArrayView, AnyArrayViewMut, AnyCowArray, - Codec, StaticCodec, StaticCodecConfig, StaticCodecVersion, + ArrayDType, ArrayDataMutExt, Codec, StaticCodec, StaticCodecConfig, StaticCodecVersion, }; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; @@ -37,7 +37,7 @@ use ::zstd_sys as _; #[cfg(test)] use ::serde_json as _; -type Sz3CodecVersion = StaticCodecVersion<0, 1, 0>; +type Sz3CodecVersion = StaticCodecVersion<0, 2, 0>; #[derive(Clone, Serialize, Deserialize, JsonSchema)] // serde cannot deny unknown fields because of the flatten @@ -116,68 +116,38 @@ pub enum Sz3ErrorBound { #[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)] #[serde(deny_unknown_fields)] pub enum Sz3Predictor { - /// Linear interpolation - #[serde(rename = "linear-interpolation")] - LinearInterpolation, - /// Cubic interpolation - #[serde(rename = "cubic-interpolation")] - CubicInterpolation, - /// Linear interpolation + Lorenzo predictor - #[serde(rename = "linear-interpolation-lorenzo")] - LinearInterpolationLorenzo, - /// Cubic interpolation + Lorenzo predictor - #[serde(rename = "cubic-interpolation-lorenzo")] - CubicInterpolationLorenzo, + /// Interpolation + #[serde(rename = "interpolation")] + Interpolation, + /// Interpolation + Lorenzo predictor + #[serde(rename = "interpolation-lorenzo")] + InterpolationLorenzo, /// 1st order regression #[serde(rename = "regression")] Regression, - /// 2nd order regression - #[serde(rename = "regression2")] - RegressionSecondOrder, - /// 1st+2nd order regression - #[serde(rename = "regression-regression2")] - RegressionFirstSecondOrder, /// 2nd order Lorenzo predictor #[serde(rename = "lorenzo2")] LorenzoSecondOrder, - /// 2nd order Lorenzo predictor + 2nd order regression - #[serde(rename = "lorenzo2-regression2")] - LorenzoSecondOrderRegressionSecondOrder, /// 2nd order Lorenzo predictor + 1st order regression #[serde(rename = "lorenzo2-regression")] LorenzoSecondOrderRegression, - /// 2nd order Lorenzo predictor + 1st order regression - #[serde(rename = "lorenzo2-regression-regression2")] - LorenzoSecondOrderRegressionFirstSecondOrder, /// 1st order Lorenzo predictor #[serde(rename = "lorenzo")] Lorenzo, - /// 1st order Lorenzo predictor + 2nd order regression - #[serde(rename = "lorenzo-regression2")] - LorenzoRegressionSecondOrder, /// 1st order Lorenzo predictor + 1st order regression #[serde(rename = "lorenzo-regression")] LorenzoRegression, - /// 1st order Lorenzo predictor + 1st and 2nd order regression - #[serde(rename = "lorenzo-regression-regression2")] - LorenzoRegressionFirstSecondOrder, /// 1st+2nd order Lorenzo predictor #[serde(rename = "lorenzo-lorenzo2")] LorenzoFirstSecondOrder, - /// 1st+2nd order Lorenzo predictor + 2nd order regression - #[serde(rename = "lorenzo-lorenzo2-regression2")] - LorenzoFirstSecondOrderRegressionSecondOrder, /// 1st+2nd order Lorenzo predictor + 1st order regression #[serde(rename = "lorenzo-lorenzo2-regression")] LorenzoFirstSecondOrderRegression, - /// 1st+2nd order Lorenzo predictor + 1st+2nd order regression - #[serde(rename = "lorenzo-lorenzo2-regression-regression2")] - LorenzoFirstSecondOrderRegressionFirstSecondOrder, } #[expect(clippy::unnecessary_wraps)] const fn default_predictor() -> Option { - Some(Sz3Predictor::CubicInterpolationLorenzo) + Some(Sz3Predictor::InterpolationLorenzo) } impl Codec for Sz3Codec { @@ -224,11 +194,21 @@ impl Codec for Sz3Codec { fn decode_into( &self, encoded: AnyArrayView, - mut decoded: AnyArrayViewMut, + decoded: AnyArrayViewMut, ) -> Result<(), Self::Error> { - let decoded_in = self.decode(encoded.cow())?; + let AnyArrayView::U8(encoded) = encoded else { + return Err(Sz3CodecError::EncodedDataNotBytes { + dtype: encoded.dtype(), + }); + }; + + if !matches!(encoded.shape(), [_]) { + return Err(Sz3CodecError::EncodedDataNotOneDimensional { + shape: encoded.shape().to_vec(), + }); + } - Ok(decoded.assign(&decoded_in)?) + decompress_into(&AnyArrayView::U8(encoded).as_bytes(), decoded) } } @@ -296,6 +276,12 @@ pub enum Sz3CodecError { /// Opaque source error source: Sz3HeaderError, }, + /// [`Sz3Codec`] failed to decode the data + #[error("Sz3 failed to decode the data")] + Sz3DecodeFailed { + /// Opaque source error + source: Sz3CodingError, + }, /// [`Sz3Codec`] decoded an invalid array shape header which does not fit /// the decoded data #[error("Sz3 decoded an invalid array shape header which does not fit the decoded data")] @@ -412,150 +398,42 @@ pub fn compress, D: Dimension>( }; let mut config = sz3::Config::new(error_bound); - // configure the interpolation mode, if necessary - let interpolation = match predictor { - Some(Sz3Predictor::LinearInterpolation | Sz3Predictor::LinearInterpolationLorenzo) => { - Some(sz3::InterpolationAlgorithm::Linear) - } - Some(Sz3Predictor::CubicInterpolation | Sz3Predictor::CubicInterpolationLorenzo) => { - Some(sz3::InterpolationAlgorithm::Cubic) - } - Some( - Sz3Predictor::Regression - | Sz3Predictor::RegressionSecondOrder - | Sz3Predictor::RegressionFirstSecondOrder - | Sz3Predictor::LorenzoSecondOrder - | Sz3Predictor::LorenzoSecondOrderRegressionSecondOrder - | Sz3Predictor::LorenzoSecondOrderRegression - | Sz3Predictor::LorenzoSecondOrderRegressionFirstSecondOrder - | Sz3Predictor::Lorenzo - | Sz3Predictor::LorenzoRegressionSecondOrder - | Sz3Predictor::LorenzoRegression - | Sz3Predictor::LorenzoRegressionFirstSecondOrder - | Sz3Predictor::LorenzoFirstSecondOrder - | Sz3Predictor::LorenzoFirstSecondOrderRegressionSecondOrder - | Sz3Predictor::LorenzoFirstSecondOrderRegression - | Sz3Predictor::LorenzoFirstSecondOrderRegressionFirstSecondOrder, - ) - | None => None, - }; - if let Some(interpolation) = interpolation { - config = config.interpolation_algorithm(interpolation); - } - // configure the predictor (compression algorithm) let predictor = match predictor { - Some(Sz3Predictor::LinearInterpolation | Sz3Predictor::CubicInterpolation) => { - sz3::CompressionAlgorithm::Interpolation - } - Some( - Sz3Predictor::LinearInterpolationLorenzo | Sz3Predictor::CubicInterpolationLorenzo, - ) => sz3::CompressionAlgorithm::InterpolationLorenzo, - Some(Sz3Predictor::RegressionSecondOrder) => sz3::CompressionAlgorithm::LorenzoRegression { - lorenzo: false, - lorenzo_second_order: false, - regression: false, - regression_second_order: true, - prediction_dimension: None, - }, + Some(Sz3Predictor::Interpolation) => sz3::CompressionAlgorithm::Interpolation, + Some(Sz3Predictor::InterpolationLorenzo) => sz3::CompressionAlgorithm::InterpolationLorenzo, Some(Sz3Predictor::Regression) => sz3::CompressionAlgorithm::LorenzoRegression { lorenzo: false, lorenzo_second_order: false, regression: true, - regression_second_order: false, - prediction_dimension: None, }, - Some(Sz3Predictor::RegressionFirstSecondOrder) => { - sz3::CompressionAlgorithm::LorenzoRegression { - lorenzo: false, - lorenzo_second_order: false, - regression: true, - regression_second_order: true, - prediction_dimension: None, - } - } Some(Sz3Predictor::LorenzoSecondOrder) => sz3::CompressionAlgorithm::LorenzoRegression { lorenzo: false, lorenzo_second_order: true, regression: false, - regression_second_order: false, - prediction_dimension: None, }, - Some(Sz3Predictor::LorenzoSecondOrderRegressionSecondOrder) => { - sz3::CompressionAlgorithm::LorenzoRegression { - lorenzo: false, - lorenzo_second_order: true, - regression: false, - regression_second_order: true, - prediction_dimension: None, - } - } Some(Sz3Predictor::LorenzoSecondOrderRegression) => { sz3::CompressionAlgorithm::LorenzoRegression { lorenzo: false, lorenzo_second_order: true, regression: true, - regression_second_order: false, - prediction_dimension: None, - } - } - Some(Sz3Predictor::LorenzoSecondOrderRegressionFirstSecondOrder) => { - sz3::CompressionAlgorithm::LorenzoRegression { - lorenzo: false, - lorenzo_second_order: true, - regression: true, - regression_second_order: true, - prediction_dimension: None, } } Some(Sz3Predictor::Lorenzo) => sz3::CompressionAlgorithm::LorenzoRegression { lorenzo: true, lorenzo_second_order: false, regression: false, - regression_second_order: false, - prediction_dimension: None, }, - Some(Sz3Predictor::LorenzoRegressionSecondOrder) => { - sz3::CompressionAlgorithm::LorenzoRegression { - lorenzo: true, - lorenzo_second_order: false, - regression: false, - regression_second_order: true, - prediction_dimension: None, - } - } Some(Sz3Predictor::LorenzoRegression) => sz3::CompressionAlgorithm::LorenzoRegression { lorenzo: true, lorenzo_second_order: false, regression: true, - regression_second_order: false, - prediction_dimension: None, }, - Some(Sz3Predictor::LorenzoRegressionFirstSecondOrder) => { - sz3::CompressionAlgorithm::LorenzoRegression { - lorenzo: true, - lorenzo_second_order: false, - regression: true, - regression_second_order: true, - prediction_dimension: None, - } - } Some(Sz3Predictor::LorenzoFirstSecondOrder) => { sz3::CompressionAlgorithm::LorenzoRegression { lorenzo: true, lorenzo_second_order: true, regression: false, - regression_second_order: false, - prediction_dimension: None, - } - } - Some(Sz3Predictor::LorenzoFirstSecondOrderRegressionSecondOrder) => { - sz3::CompressionAlgorithm::LorenzoRegression { - lorenzo: true, - lorenzo_second_order: true, - regression: false, - regression_second_order: true, - prediction_dimension: None, } } Some(Sz3Predictor::LorenzoFirstSecondOrderRegression) => { @@ -563,41 +441,45 @@ pub fn compress, D: Dimension>( lorenzo: true, lorenzo_second_order: true, regression: true, - regression_second_order: false, - prediction_dimension: None, - } - } - Some(Sz3Predictor::LorenzoFirstSecondOrderRegressionFirstSecondOrder) => { - sz3::CompressionAlgorithm::LorenzoRegression { - lorenzo: true, - lorenzo_second_order: true, - regression: true, - regression_second_order: true, - prediction_dimension: None, } } None => sz3::CompressionAlgorithm::NoPrediction, }; config = config.compression_algorithm(predictor); - // TODO: avoid extra allocation here - let compressed = sz3::compress_with_config(&data, &config).map_err(|err| { + sz3::compress_into_with_config(&data, &config, &mut encoded_bytes).map_err(|err| { Sz3CodecError::Sz3EncodeFailed { source: Sz3CodingError(err), } })?; - encoded_bytes.extend_from_slice(&compressed); Ok(encoded_bytes) } -/// Decompresses the `encoded` data into an array. +/// Decompresses the `encoded` data into an array using SZ3. /// /// # Errors /// /// Errors with /// - [`Sz3CodecError::HeaderDecodeFailed`] if decoding the header failed +/// - [`Sz3CodecError::Sz3DecodeFailed`] if decoding failed with an opaque error pub fn decompress(encoded: &[u8]) -> Result { + fn decompress_typed( + encoded: &[u8], + shape: &[usize], + ) -> Result, Sz3CodecError> { + if shape.iter().copied().any(|s| s == 0) { + return Ok(Array::from_shape_vec(shape, Vec::new())?); + } + + let (_config, decompressed) = + sz3::decompress(encoded).map_err(|err| Sz3CodecError::Sz3DecodeFailed { + source: Sz3CodingError(err), + })?; + + Ok(Array::from_shape_vec(shape, decompressed.into_data())?) + } + let (header, data) = postcard::take_from_bytes::(encoded).map_err(|err| { Sz3CodecError::HeaderDecodeFailed { @@ -605,56 +487,162 @@ pub fn decompress(encoded: &[u8]) -> Result { } })?; - let decoded = if header.shape.iter().copied().product::() == 0 { - match header.dtype { - Sz3DType::I32 => { - AnyArray::I32(Array::from_shape_vec(&*header.shape, Vec::new())?.into_dyn()) - } - Sz3DType::I64 => { - AnyArray::I64(Array::from_shape_vec(&*header.shape, Vec::new())?.into_dyn()) + let decoded = match header.dtype { + Sz3DType::U8 => AnyArray::U8(decompress_typed(data, &header.shape)?), + Sz3DType::I8 => AnyArray::I8(decompress_typed(data, &header.shape)?), + Sz3DType::U16 => AnyArray::U16(decompress_typed(data, &header.shape)?), + Sz3DType::I16 => AnyArray::I16(decompress_typed(data, &header.shape)?), + Sz3DType::U32 => AnyArray::U32(decompress_typed(data, &header.shape)?), + Sz3DType::I32 => AnyArray::I32(decompress_typed(data, &header.shape)?), + Sz3DType::U64 => AnyArray::U64(decompress_typed(data, &header.shape)?), + Sz3DType::I64 => AnyArray::I64(decompress_typed(data, &header.shape)?), + Sz3DType::F32 => AnyArray::F32(decompress_typed(data, &header.shape)?), + Sz3DType::F64 => AnyArray::F64(decompress_typed(data, &header.shape)?), + }; + + Ok(decoded) +} + +/// Decompresses the `encoded` data into a `decoded` array using SZ3. +/// +/// # Errors +/// +/// Errors with +/// - [`Sz3CodecError::HeaderDecodeFailed`] if decoding the header failed +/// - [`Sz3CodecError::MismatchedDecodeIntoArray`] if the `decoded` array is of +/// the wrong dtype or shape +/// - [`Sz3CodecError::Sz3DecodeFailed`] if decoding failed with an opaque error +pub fn decompress_into(encoded: &[u8], decoded: AnyArrayViewMut) -> Result<(), Sz3CodecError> { + fn decompress_into_typed( + encoded: &[u8], + mut decoded: ArrayViewMut, + ) -> Result<(), Sz3CodecError> { + if decoded.is_empty() { + return Ok(()); + } + + let decoded_shape = decoded.shape().to_vec(); + + decoded.with_slice_mut(|mut decoded| { + let decoded_len = decoded.len(); + + let mut builder = sz3::DimensionedData::build_mut(&mut decoded); + + for length in &decoded_shape { + // Sz3 ignores dimensions of length 1 and panics on length zero + // Since they carry no information for Sz3 and we already encode them + // in our custom header, we just skip them here + if *length > 1 { + builder = builder + .dim(*length) + // FIXME: different error code + .map_err(|err| Sz3CodecError::InvalidEncodeShape { + source: Sz3CodingError(err), + shape: decoded_shape.clone(), + })?; + } } - Sz3DType::F32 => { - AnyArray::F32(Array::from_shape_vec(&*header.shape, Vec::new())?.into_dyn()) + + if decoded_len == 1 { + // If there is only one element, all dimensions will have been skipped, + // so we explicitly encode one dimension of size 1 here + builder = builder + .dim(1) + // FIXME: different error code + .map_err(|err| Sz3CodecError::InvalidEncodeShape { + source: Sz3CodingError(err), + shape: decoded_shape.clone(), + })?; } - Sz3DType::F64 => { - AnyArray::F64(Array::from_shape_vec(&*header.shape, Vec::new())?.into_dyn()) + + let mut decoded = builder + .finish() + // FIXME: different error code + .map_err(|err| Sz3CodecError::InvalidEncodeShape { + source: Sz3CodingError(err), + shape: decoded_shape, + })?; + + sz3::decompress_into_dimensioned(encoded, &mut decoded).map_err(|err| { + Sz3CodecError::Sz3DecodeFailed { + source: Sz3CodingError(err), + } + }) + })?; + + Ok(()) + } + + let (header, data) = + postcard::take_from_bytes::(encoded).map_err(|err| { + Sz3CodecError::HeaderDecodeFailed { + source: Sz3HeaderError(err), } - } - } else { - // TODO: avoid extra allocation here - match header.dtype { - Sz3DType::I32 => AnyArray::I32(Array::from_shape_vec( - &*header.shape, - Vec::from(sz3::decompress(data).1.data()), - )?), - Sz3DType::I64 => AnyArray::I64(Array::from_shape_vec( - &*header.shape, - Vec::from(sz3::decompress(data).1.data()), - )?), - Sz3DType::F32 => AnyArray::F32(Array::from_shape_vec( - &*header.shape, - Vec::from(sz3::decompress(data).1.data()), - )?), - Sz3DType::F64 => AnyArray::F64(Array::from_shape_vec( - &*header.shape, - Vec::from(sz3::decompress(data).1.data()), - )?), - } - }; + })?; - Ok(decoded) + if decoded.shape() != &*header.shape { + return Err(Sz3CodecError::MismatchedDecodeIntoArray { + source: AnyArrayAssignError::ShapeMismatch { + src: header.shape.into_owned(), + dst: decoded.shape().to_vec(), + }, + }); + } + + match (decoded, header.dtype) { + (AnyArrayViewMut::U8(decoded), Sz3DType::U8) => decompress_into_typed(data, decoded), + (AnyArrayViewMut::I8(decoded), Sz3DType::I8) => decompress_into_typed(data, decoded), + (AnyArrayViewMut::U16(decoded), Sz3DType::U16) => decompress_into_typed(data, decoded), + (AnyArrayViewMut::I16(decoded), Sz3DType::I16) => decompress_into_typed(data, decoded), + (AnyArrayViewMut::U32(decoded), Sz3DType::U32) => decompress_into_typed(data, decoded), + (AnyArrayViewMut::I32(decoded), Sz3DType::I32) => decompress_into_typed(data, decoded), + (AnyArrayViewMut::U64(decoded), Sz3DType::U64) => decompress_into_typed(data, decoded), + (AnyArrayViewMut::I64(decoded), Sz3DType::I64) => decompress_into_typed(data, decoded), + (AnyArrayViewMut::F32(decoded), Sz3DType::F32) => decompress_into_typed(data, decoded), + (AnyArrayViewMut::F64(decoded), Sz3DType::F64) => decompress_into_typed(data, decoded), + (decoded, dtype) => Err(Sz3CodecError::MismatchedDecodeIntoArray { + source: AnyArrayAssignError::DTypeMismatch { + src: dtype.into_dtype(), + dst: decoded.dtype(), + }, + }), + } } /// Array element types which can be compressed with SZ3. -pub trait Sz3Element: Copy + sz3::SZ3Compressible { +pub trait Sz3Element: Copy + sz3::SZ3Compressible + ArrayDType { /// The dtype representation of the type const DTYPE: Sz3DType; } +impl Sz3Element for u8 { + const DTYPE: Sz3DType = Sz3DType::U8; +} + +impl Sz3Element for i8 { + const DTYPE: Sz3DType = Sz3DType::I8; +} + +impl Sz3Element for u16 { + const DTYPE: Sz3DType = Sz3DType::U16; +} + +impl Sz3Element for i16 { + const DTYPE: Sz3DType = Sz3DType::I16; +} + +impl Sz3Element for u32 { + const DTYPE: Sz3DType = Sz3DType::U32; +} + impl Sz3Element for i32 { const DTYPE: Sz3DType = Sz3DType::I32; } +impl Sz3Element for u64 { + const DTYPE: Sz3DType = Sz3DType::U64; +} + impl Sz3Element for i64 { const DTYPE: Sz3DType = Sz3DType::I64; } @@ -679,6 +667,18 @@ struct CompressionHeader<'a> { #[derive(Copy, Clone, Debug, Serialize, Deserialize)] #[expect(missing_docs)] pub enum Sz3DType { + #[serde(rename = "u8", alias = "uint8")] + U8, + #[serde(rename = "u16", alias = "uint16")] + U16, + #[serde(rename = "u32", alias = "uint32")] + U32, + #[serde(rename = "u64", alias = "uint64")] + U64, + #[serde(rename = "i8", alias = "int8")] + I8, + #[serde(rename = "i16", alias = "int16")] + I16, #[serde(rename = "i32", alias = "int32")] I32, #[serde(rename = "i64", alias = "int64")] @@ -689,9 +689,34 @@ pub enum Sz3DType { F64, } +impl Sz3DType { + /// Get the corresponding [`AnyArrayDType`] + #[must_use] + pub const fn into_dtype(self) -> AnyArrayDType { + match self { + Self::U8 => AnyArrayDType::U8, + Self::U16 => AnyArrayDType::U16, + Self::U32 => AnyArrayDType::U32, + Self::U64 => AnyArrayDType::U64, + Self::I8 => AnyArrayDType::I8, + Self::I16 => AnyArrayDType::I16, + Self::I32 => AnyArrayDType::I32, + Self::I64 => AnyArrayDType::I64, + Self::F32 => AnyArrayDType::F32, + Self::F64 => AnyArrayDType::F64, + } + } +} + impl fmt::Display for Sz3DType { fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { fmt.write_str(match self { + Self::U8 => "u8", + Self::U16 => "u16", + Self::U32 => "u32", + Self::U64 => "u64", + Self::I8 => "i8", + Self::I16 => "i16", Self::I32 => "i32", Self::I64 => "i64", Self::F32 => "f32", @@ -733,7 +758,12 @@ mod tests { )?; let decoded = decompress(&encoded)?; - assert_eq!(decoded, AnyArray::I32(data)); + assert_eq!(decoded, AnyArray::I32(data.clone())); + + let mut decoded = Array::zeros(data.dim()); + decompress_into(&encoded, AnyArrayViewMut::I32(decoded.view_mut()))?; + + assert_eq!(decoded, data); Ok(()) } @@ -758,6 +788,14 @@ mod tests { decoded, AnyArray::F64(Array1::from_vec(data.to_vec()).into_dyn()) ); + + let mut decoded = Array::zeros([data.len()]); + decompress_into( + &encoded, + AnyArrayViewMut::F64(decoded.view_mut().into_dyn()), + )?; + + assert_eq!(decoded, Array1::from_vec(data.to_vec())); } Ok(()) @@ -769,21 +807,15 @@ mod tests { for predictor in [ None, + Some(Sz3Predictor::Interpolation), + Some(Sz3Predictor::InterpolationLorenzo), Some(Sz3Predictor::Regression), - Some(Sz3Predictor::RegressionSecondOrder), - Some(Sz3Predictor::RegressionFirstSecondOrder), Some(Sz3Predictor::LorenzoSecondOrder), - Some(Sz3Predictor::LorenzoSecondOrderRegressionSecondOrder), Some(Sz3Predictor::LorenzoSecondOrderRegression), - Some(Sz3Predictor::LorenzoSecondOrderRegressionFirstSecondOrder), Some(Sz3Predictor::Lorenzo), - Some(Sz3Predictor::LorenzoRegressionSecondOrder), Some(Sz3Predictor::LorenzoRegression), - Some(Sz3Predictor::LorenzoRegressionFirstSecondOrder), Some(Sz3Predictor::LorenzoFirstSecondOrder), - Some(Sz3Predictor::LorenzoFirstSecondOrderRegressionSecondOrder), Some(Sz3Predictor::LorenzoFirstSecondOrderRegression), - Some(Sz3Predictor::LorenzoFirstSecondOrderRegressionFirstSecondOrder), ] { let encoded = compress( data.view(), @@ -791,8 +823,47 @@ mod tests { &Sz3ErrorBound::Absolute { abs: 0.1 }, )?; let _decoded = decompress(&encoded)?; + + let mut decoded = Array::zeros(data.dim()); + decompress_into( + &encoded, + AnyArrayViewMut::F64(decoded.view_mut().into_dyn()), + )?; } Ok(()) } + + #[test] + fn all_dtypes() -> Result<(), Sz3CodecError> { + fn compress_decompress( + iter: impl Clone + IntoIterator, + view_mut: impl for<'a> Fn(ArrayViewMut<'a, T, IxDyn>) -> AnyArrayViewMut<'a>, + ) -> Result<(), Sz3CodecError> { + let encoded = compress( + Array::from_iter(iter.clone()).view(), + None, + &Sz3ErrorBound::Absolute { abs: 2.0 }, + )?; + let _decoded = decompress(&encoded)?; + + let mut decoded = Array::::zeros([iter.into_iter().count()]).into_dyn(); + decompress_into(&encoded, view_mut(decoded.view_mut().into_dyn()))?; + + Ok(()) + } + + compress_decompress(0_u8..42, |x| AnyArrayViewMut::U8(x))?; + compress_decompress(-42_i8..42, |x| AnyArrayViewMut::I8(x))?; + compress_decompress(0_u16..42, |x| AnyArrayViewMut::U16(x))?; + compress_decompress(-42_i16..42, |x| AnyArrayViewMut::I16(x))?; + compress_decompress(0_u32..42, |x| AnyArrayViewMut::U32(x))?; + compress_decompress(-42_i32..42, |x| AnyArrayViewMut::I32(x))?; + compress_decompress(0_u64..42, |x| AnyArrayViewMut::U64(x))?; + compress_decompress(-42_i64..42, |x| AnyArrayViewMut::I64(x))?; + compress_decompress((-42_i16..42).map(f32::from), |x| AnyArrayViewMut::F32(x))?; + compress_decompress((-42_i16..42).map(f64::from), |x| AnyArrayViewMut::F64(x))?; + + Ok(()) + } } diff --git a/codecs/sz3/tests/config.rs b/codecs/sz3/tests/config.rs index 8d22c628f..aa5bda0f2 100644 --- a/codecs/sz3/tests/config.rs +++ b/codecs/sz3/tests/config.rs @@ -1,6 +1,9 @@ #![expect(missing_docs)] -use ::{ndarray as _, postcard as _, schemars as _, sz3 as _, thiserror as _, zstd_sys as _}; +use ::{ + ndarray as _, num_traits as _, postcard as _, schemars as _, sz3 as _, thiserror as _, + zstd_sys as _, +}; use numcodecs::StaticCodec; use numcodecs_sz3::Sz3Codec; @@ -59,7 +62,7 @@ fn config_predictor() { Deserialize::deserialize(json!({ "eb_mode": "rel", "eb_rel": 1.0, - "predictor": "linear-interpolation", + "predictor": "interpolation", })) .unwrap(), ); @@ -68,7 +71,7 @@ fn config_predictor() { Deserialize::deserialize(json!({ "eb_mode": "rel", "eb_rel": 1.0, - "predictor": "cubic-interpolation-lorenzo", + "predictor": "interpolation-lorenzo", })) .unwrap(), ); diff --git a/codecs/sz3/tests/schema.json b/codecs/sz3/tests/schema.json index c32d21c62..0c5fb52db 100644 --- a/codecs/sz3/tests/schema.json +++ b/codecs/sz3/tests/schema.json @@ -7,98 +7,48 @@ "oneOf": [ { "type": "string", - "const": "linear-interpolation", - "description": "Linear interpolation" + "const": "interpolation", + "description": "Interpolation" }, { "type": "string", - "const": "cubic-interpolation", - "description": "Cubic interpolation" - }, - { - "type": "string", - "const": "linear-interpolation-lorenzo", - "description": "Linear interpolation + Lorenzo predictor" - }, - { - "type": "string", - "const": "cubic-interpolation-lorenzo", - "description": "Cubic interpolation + Lorenzo predictor" + "const": "interpolation-lorenzo", + "description": "Interpolation + Lorenzo predictor" }, { "type": "string", "const": "regression", "description": "1st order regression" }, - { - "type": "string", - "const": "regression2", - "description": "2nd order regression" - }, - { - "type": "string", - "const": "regression-regression2", - "description": "1st+2nd order regression" - }, { "type": "string", "const": "lorenzo2", "description": "2nd order Lorenzo predictor" }, - { - "type": "string", - "const": "lorenzo2-regression2", - "description": "2nd order Lorenzo predictor + 2nd order regression" - }, { "type": "string", "const": "lorenzo2-regression", "description": "2nd order Lorenzo predictor + 1st order regression" }, - { - "type": "string", - "const": "lorenzo2-regression-regression2", - "description": "2nd order Lorenzo predictor + 1st order regression" - }, { "type": "string", "const": "lorenzo", "description": "1st order Lorenzo predictor" }, - { - "type": "string", - "const": "lorenzo-regression2", - "description": "1st order Lorenzo predictor + 2nd order regression" - }, { "type": "string", "const": "lorenzo-regression", "description": "1st order Lorenzo predictor + 1st order regression" }, - { - "type": "string", - "const": "lorenzo-regression-regression2", - "description": "1st order Lorenzo predictor + 1st and 2nd order regression" - }, { "type": "string", "const": "lorenzo-lorenzo2", "description": "1st+2nd order Lorenzo predictor" }, - { - "type": "string", - "const": "lorenzo-lorenzo2-regression2", - "description": "1st+2nd order Lorenzo predictor + 2nd order regression" - }, { "type": "string", "const": "lorenzo-lorenzo2-regression", "description": "1st+2nd order Lorenzo predictor + 1st order regression" - }, - { - "type": "string", - "const": "lorenzo-lorenzo2-regression-regression2", - "description": "1st+2nd order Lorenzo predictor + 1st+2nd order regression" } ], "description": "SZ3 predictor" @@ -108,13 +58,13 @@ } ], "description": "Predictor", - "default": "cubic-interpolation-lorenzo" + "default": "interpolation-lorenzo" }, "_version": { "type": "string", "pattern": "^(0|[1-9]\\d*)\\.(0|[1-9]\\d*)\\.(0|[1-9]\\d*)(?:-((?:0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\\.(?:0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\\+([0-9a-zA-Z-]+(?:\\.[0-9a-zA-Z-]+)*))?$", "description": "The codec's encoding format version. Do not provide this parameter explicitly.", - "default": "0.1.0" + "default": "0.2.0" } }, "unevaluatedProperties": false, diff --git a/codecs/sz3/tests/schema.rs b/codecs/sz3/tests/schema.rs index cb3213aba..9e40080e0 100644 --- a/codecs/sz3/tests/schema.rs +++ b/codecs/sz3/tests/schema.rs @@ -1,8 +1,8 @@ #![expect(missing_docs)] use ::{ - ndarray as _, postcard as _, schemars as _, serde as _, serde_json as _, sz3 as _, - thiserror as _, zstd_sys as _, + ndarray as _, num_traits as _, postcard as _, schemars as _, serde as _, serde_json as _, + sz3 as _, thiserror as _, zstd_sys as _, }; use numcodecs::{DynCodecType, StaticCodecType}; diff --git a/codecs/zfp-classic/src/lib.rs b/codecs/zfp-classic/src/lib.rs index a73c9d892..f008a6cc9 100644 --- a/codecs/zfp-classic/src/lib.rs +++ b/codecs/zfp-classic/src/lib.rs @@ -378,7 +378,7 @@ pub fn decompress(encoded: &[u8]) -> Result { })?; // Return empty data for zero-size arrays - if header.shape.iter().copied().product::() == 0 { + if header.shape.iter().copied().any(|s| s == 0) { let decoded = match header.dtype { ZfpDType::I32 => AnyArray::I32(Array::zeros(&*header.shape)), ZfpDType::I64 => AnyArray::I64(Array::zeros(&*header.shape)), diff --git a/codecs/zfp/src/lib.rs b/codecs/zfp/src/lib.rs index 8e91a1796..3c16e4f87 100644 --- a/codecs/zfp/src/lib.rs +++ b/codecs/zfp/src/lib.rs @@ -378,7 +378,7 @@ pub fn decompress(encoded: &[u8]) -> Result { })?; // Return empty data for zero-size arrays - if header.shape.iter().copied().product::() == 0 { + if header.shape.iter().copied().any(|s| s == 0) { let decoded = match header.dtype { ZfpDType::I32 => AnyArray::I32(Array::zeros(&*header.shape)), ZfpDType::I64 => AnyArray::I64(Array::zeros(&*header.shape)), diff --git a/crates/numcodecs/Cargo.toml b/crates/numcodecs/Cargo.toml index c7993bfb9..daf48e7b5 100644 --- a/crates/numcodecs/Cargo.toml +++ b/crates/numcodecs/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "numcodecs" -version = "0.3.0" +version = "0.3.1" edition = { workspace = true } authors = { workspace = true } repository = { workspace = true } diff --git a/crates/numcodecs/src/array.rs b/crates/numcodecs/src/array.rs index 6de4ffb6e..095b1a334 100644 --- a/crates/numcodecs/src/array.rs +++ b/crates/numcodecs/src/array.rs @@ -320,38 +320,22 @@ where /// Otherwise, the data is cloned and put into standard order first, and /// later copied back into the array. pub fn with_bytes_mut(&mut self, with: impl FnOnce(&mut [u8]) -> O) -> O { - fn array_with_bytes_mut, O>( + fn array_with_bytes_mut, O>( x: &mut ArrayBase, with: impl FnOnce(&mut [u8]) -> O, ) -> O { - if let Some(x) = x.as_slice_mut() { - #[expect(unsafe_code)] - // Safety: casting to a byte slice is only safe since this - // private helper function is only called for plain- - // old-data types and the slice's length is adjusted + #[expect(unsafe_code)] + // Safety: casting to a byte slice is only safe since this + // private helper function is only called for plain- + // old-data types and the slice's length is adjusted + x.with_slice_mut(|x| { with(unsafe { std::slice::from_raw_parts_mut( x.as_mut_ptr().cast::(), std::mem::size_of_val(x), ) }) - } else { - let mut x_vec: Vec = x.into_iter().map(|x| *x).collect::>(); - - #[expect(unsafe_code)] - // Safety: casting to a byte slice is only safe since this - // private helper function is only called for plain- - // old-data types and the slice's length is adjusted - let result = with(unsafe { - std::slice::from_raw_parts_mut( - x_vec.as_mut_ptr().cast::(), - std::mem::size_of_val(x_vec.as_slice()), - ) - }); - - x.iter_mut().zip(x_vec).for_each(|(x, x_new)| *x = x_new); - result - } + }) } match self { @@ -601,6 +585,44 @@ where } } +/// Extension trait for [`ArrayBase`] where the data provides mutable access +/// and implements [`DataMut`]. +/// +/// This trait is sealed and cannot be implemented in your code, but is +/// provided for all arrays over element types implementing [`ArrayDType`]. +pub trait ArrayDataMutExt: sealed::SealedArrayDataMutExt { + #[must_use] + /// Provides access to the array's data as a mutable slice. + /// + /// If the array is contiguous and in standard order, i.e. if the element + /// order in memory corresponds to the logical order of the array's + /// elements, a mutable view of the data is returned without cloning. + /// + /// Otherwise, the data is cloned and put into standard order first, and + /// later copied back into the array. + fn with_slice_mut(&mut self, with: impl FnOnce(&mut [T]) -> O) -> O; +} + +impl, D: Dimension> ArrayDataMutExt for ArrayBase { + fn with_slice_mut(&mut self, with: impl FnOnce(&mut [T]) -> O) -> O { + if let Some(slice) = self.as_slice_mut() { + with(slice) + } else { + let mut vec: Vec = self.into_iter().map(|x| *x).collect::>(); + + let result = with(vec.as_mut_slice()); + + self.iter_mut().zip(vec).for_each(|(x, x_new)| *x = x_new); + result + } + } +} + +impl, D: Dimension> sealed::SealedArrayDataMutExt + for ArrayBase +{ +} + /// Array-representation support for all dtypes included in [`AnyArrayBase`]. #[expect(missing_docs)] pub trait AnyRawData { @@ -740,7 +762,7 @@ impl fmt::Display for AnyArrayDType { } /// Types which are included in [`AnyArrayDType`] -pub trait ArrayDType: crate::sealed::Sealed { +pub trait ArrayDType: sealed::SealedArrayDType { /// [`AnyArrayDType`] representation of this type const DTYPE: AnyArrayDType; @@ -751,7 +773,7 @@ pub trait ArrayDType: crate::sealed::Sealed { macro_rules! array_dtype { ($($dtype:ident($ty:ty)),*) => { $( - impl crate::sealed::Sealed for $ty {} + impl sealed::SealedArrayDType for $ty {} impl ArrayDType for $ty { const DTYPE: AnyArrayDType = AnyArrayDType::$dtype; @@ -788,3 +810,9 @@ pub enum AnyArrayAssignError { dst: Vec, }, } + +mod sealed { + pub trait SealedArrayDType: Copy {} + + pub trait SealedArrayDataMutExt {} +} diff --git a/crates/numcodecs/src/lib.rs b/crates/numcodecs/src/lib.rs index 4e5ff5ad3..b4db09d2b 100644 --- a/crates/numcodecs/src/lib.rs +++ b/crates/numcodecs/src/lib.rs @@ -24,13 +24,9 @@ mod codec; pub use array::{ AnyArcArray, AnyArray, AnyArrayAssignError, AnyArrayBase, AnyArrayDType, AnyArrayView, - AnyArrayViewMut, AnyCowArray, AnyRawData, ArrayDType, + AnyArrayViewMut, AnyCowArray, AnyRawData, ArrayDType, ArrayDataMutExt, }; pub use codec::{ Codec, DynCodec, DynCodecType, StaticCodec, StaticCodecConfig, StaticCodecType, StaticCodecVersion, codec_from_config_with_id, serialize_codec_config_with_id, }; - -mod sealed { - pub trait Sealed {} -}