From 4eb3232a7c4f8fa69b3cfcd0ee89863b627fcaf9 Mon Sep 17 00:00:00 2001 From: scaramallion Date: Thu, 22 Apr 2021 20:01:24 +1000 Subject: [PATCH 01/19] Initial --- LICENSE | 2 +- README.md | 40 ++++++++++++++++---- rle/tests/test_encode.py | 0 setup.py | 7 +++- src/lib.rs | 81 ++++++++++++++++++++++++++++++++++++++-- 5 files changed, 115 insertions(+), 15 deletions(-) create mode 100644 rle/tests/test_encode.py diff --git a/LICENSE b/LICENSE index a49252d..62f3da2 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2020 scaramallion +Copyright (c) 2020-2021 scaramallion Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/README.md b/README.md index 160c35c..d67a513 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ ## pylibjpeg-rle -A fast DICOM RLE decoding plugin for pylibjpeg, written in Rust with a Python 3.6+ wrapper. +A fast DICOM ([PackBits](https://en.wikipedia.org/wiki/PackBits)) RLE plugin for [pylibjpeg](https://github.com/pydicom/pylibjpeg), written in Rust with a Python 3.6+ wrapper. Linux, OSX and Windows are all supported. @@ -22,10 +22,10 @@ python -m setup.py develop ``` ### Supported Transfer Syntaxes -#### Decoding -| UID | Description | -| --- | --- | -| 1.2.840.10008.1.2.5 | RLE Lossless | + +| UID | Description | Decoding | Encoding | +| --- | --- | --- | --- | +| 1.2.840.10008.1.2.5 | RLE Lossless | Yes | Yes | ### Benchmarks #### Decoding @@ -47,12 +47,31 @@ Time per 1000 decodes, pydicom's NumPy RLE handler vs. pylibjpeg-rle | SC_rgb_rle_32bit.dcm | 10,000 | 120,000 | 0.56 s | 0.19 s | | SC_rgb_rle_32bit_2frame.dcm | 20,000 | 240,000 | 1.03 s | 0.28 s | +#### Encoding + +Time per 1000 encodes + +| Dataset | Pixels | Bytes | NumPy | pylibjpeg-rle | +| --- | --- | --- | --- | --- | +| OBXXXX1A_rle.dcm | 480,000 | 480,000 | s | s | +| OBXXXX1A_rle_2frame.dcm | 960,000 | 960,000 | s | s | +| SC_rgb_rle.dcm | 10,000 | 30,000 | s | s | +| SC_rgb_rle_2frame.dcm | 20,000 | 60,000 | s | s | +| MR_small_RLE.dcm | 4,096 | 8,192 | s | s | +| emri_small_RLE.dcm | 40,960 | 81,920 | s | s | +| SC_rgb_rle_16bit.dcm | 10,000 | 60,000 | s | s | +| SC_rgb_rle_16bit_2frame.dcm | 20,000 | 120,000 | s | s | +| rtdose_rle_1frame.dcm | 100 | 400 | s | s | +| rtdose_rle.dcm | 1,500 | 6,000 | s | s | +| SC_rgb_rle_32bit.dcm | 10,000 | 120,000 | s | s | +| SC_rgb_rle_32bit_2frame.dcm | 20,000 | 240,000 | s | s | + ### Usage -#### With pylibjpeg and pydicom +#### Decoding +##### With pylibjpeg Because pydicom defaults to the NumPy RLE decoder, you must specify the use -of pylibjpeg when decompressing: -*Pixel Data*: +of pylibjpeg when decompressing (**note: requires pydicom v2.2+**): ```python from pydicom import dcmread from pydicom.data import get_testdata_file @@ -62,6 +81,7 @@ ds.decompress("pylibjpeg") arr = ds.pixel_array ``` +##### Standalone with pydicom Alternatively you can use the included functions to decode a given dataset: ```python from rle import pixel_array, generate_frames @@ -74,3 +94,7 @@ arr = pixel_array(ds) for arr in generate_frames(ds): print(arr.shape) ``` + +#### Encoding +##### With pylibjpeg +##### Standalone with pydicom diff --git a/rle/tests/test_encode.py b/rle/tests/test_encode.py new file mode 100644 index 0000000..e69de29 diff --git a/setup.py b/setup.py index 5ffa245..1ef62be 100644 --- a/setup.py +++ b/setup.py @@ -20,8 +20,8 @@ setup( name = 'pylibjpeg-rle', description = ( - "Python bindings for a fast RLE decoder, with a focus on use as a " - "plugin for pylibjpeg" + "Python bindings for a fast RLE decoder/encoder, with a focus on " + "use as a plugin for pylibjpeg" ), long_description = long_description, long_description_content_type = 'text/markdown', @@ -71,5 +71,8 @@ 'pylibjpeg.pixel_data_decoders': [ "1.2.840.10008.1.2.5 = rle:decode_pixel_data", ], + 'pylibjpeg.pixel_data_encoders': [ + "1.2.840.10008.1.2.5 = rle:encode_pixel_data", + ], }, ) diff --git a/src/lib.rs b/src/lib.rs index 6bd9779..c6c2d4a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,7 +1,3 @@ -// https://pyo3.rs/v0.13.2/conversions/tables.html -// bytes -> &[u8] or Vec -// bytearray -> Vec -// list[T] -> Vec use std::error::Error; use std::convert::TryFrom; @@ -12,6 +8,8 @@ use pyo3::types::{PyBytes, PyByteArray}; use pyo3::exceptions::{PyValueError}; +// RLE Decoding + #[pyfunction] fn parse_header(enc: &[u8]) -> PyResult> { /* Return the segment offsets from the RLE header. @@ -554,11 +552,86 @@ fn _decode_segment(enc: &[u8], out: &mut Vec) -> Result<(), Box> } +// RLE Encoding +#[pyfunction] +fn encode_frame() {} + + +fn _encode_segment() -> Result<(), Box> { + Ok(()) +} + + +fn _encode_row(s: &[u8]) -> Result<(Vec), Box> { + /* Return a numpy array as an RLE encoded bytearray. + + Parameters + ---------- + s + The bytes to be encoded. + + Returns + ------- + Vec + The RLE encoded row, following the format specified by the DICOM + Standard, Part 5, :dcm:`Annex G` + + Notes + ----- + * 2-byte repeat runs are always encoded as Replicate Runs rather than + only when not preceeded by a Literal Run as suggested by the Standard. + */ + // Read ahead and find first non-identical value + + // out = [] + // out_append = out.append + // out_extend = out.extend + // + // literal = [] + // for key, group in groupby(arr.astype('uint8').tolist()): + // group = list(group) + // if len(group) == 1: + // literal.append(group[0]) + // else: + // if literal: + // # Literal runs + // for ii in range(0, len(literal), 128): + // _run = literal[ii:ii + 128] + // out_append(len(_run) - 1) + // out_extend(_run) + // + // literal = [] + // + // # Replicate run + // for ii in range(0, len(group), 128): + // if len(group[ii:ii + 128]) > 1: + // # Replicate run + // out_append(257 - len(group[ii:ii + 128])) + // out_append(group[0]) + // else: + // # Literal run only if last replicate part is length 1 + // out_append(0) + // out_append(group[0]) + // + // # Final literal run if literal isn't followed by a replicate run + // for ii in range(0, len(literal), 128): + // _run = literal[ii:ii + 128] + // out_append(len(_run) - 1) + // out_extend(_run) + // + // return pack('{}B'.format(len(out)), *out) + + Ok(()) +} + + #[pymodule] fn _rle(_: Python, m: &PyModule) -> PyResult<()> { m.add_function(wrap_pyfunction!(parse_header, m)?).unwrap(); m.add_function(wrap_pyfunction!(decode_segment, m)?).unwrap(); m.add_function(wrap_pyfunction!(decode_frame, m)?).unwrap(); + m.add_function(wrap_pyfunction!(encode_frame, m)?).unwrap(); + Ok(()) } From 21a7713cc84efd002ddabaedc784bb6293fdcfa3 Mon Sep 17 00:00:00 2001 From: scaramallion Date: Fri, 23 Apr 2021 16:19:42 +1000 Subject: [PATCH 02/19] Add tests, encoding WIP skeleton --- rle/tests/test_encode.py | 529 +++++++++++++++++++++++++++++++++++++++ src/lib.rs | 115 ++++++++- 2 files changed, 640 insertions(+), 4 deletions(-) diff --git a/rle/tests/test_encode.py b/rle/tests/test_encode.py index e69de29..c49dfc5 100644 --- a/rle/tests/test_encode.py +++ b/rle/tests/test_encode.py @@ -0,0 +1,529 @@ +"""Tests for RLE encoding.""" + +import numpy as np +import pytest + +from rle._rle import encode_row + + +# Tests for RLE encoding +REFERENCE_ENCODE_ROW = [ + # Input, output + pytest.param([], b'', id='1'), + # Replicate run tests + # 2 (min) replicate + pytest.param([0] * 2, b'\xff\x00', id='1'), + pytest.param([0] * 3, b'\xfe\x00', id='2'), + pytest.param([0] * 64, b'\xc1\x00', id='3'), + pytest.param([0] * 127, b'\x82\x00', id='4'), + # 128 (max) replicate + pytest.param([0] * 128, b'\x81\x00', id='5'), + # 128 (max) replicate, 1 (min) literal + pytest.param([0] * 129, b'\x81\x00\x00\x00', id='6'), + # 128 (max) replicate, 2 (min) replicate + pytest.param([0] * 130, b'\x81\x00\xff\x00', id='7'), + # 128 (max) x 5 replicates + pytest.param([0] * 128 * 5, b'\x81\x00' * 5, id='8'), + # Literal run tests + # 1 (min) literal + pytest.param([0], b'\x00\x00', id='9'), + pytest.param([0, 1], b'\x01\x00\x01', id='10'), + pytest.param([0, 1, 2], b'\x02\x00\x01\x02', id='11'), + pytest.param([0, 1] * 32, b'\x3f' + b'\x00\x01' * 32, id='12'), + # 127 literal + pytest.param([0, 1] * 63 + [2], b'\x7e' + b'\x00\x01' * 63 + b'\x02', id='13'), + # 128 literal (max) + pytest.param([0, 1] * 64, b'\x7f' + b'\x00\x01' * 64, id='14'), + # 128 (max) literal, 1 (min) literal + pytest.param([0, 1] * 64 + [2], b'\x7f' + b'\x00\x01' * 64 + b'\x00\x02', id='15'), + # 128 (max) x 5 literals + pytest.param([0, 1] * 64 * 5, (b'\x7f' + b'\x00\x01' * 64) * 5, id='16'), + # Combination run tests + # 1 (min) literal, 1 (min) replicate + pytest.param([0, 1, 1], b'\x00\x00\xff\x01', id='17'), + # 1 (min) literal, 128 (max) replicate + pytest.param([0] + [1] * 128, b'\x00\x00\x81\x01', id='18'), + # 128 (max) literal, 2 (min) replicate + pytest.param([0, 1] * 64 + [2] * 2, b'\x7f' + b'\x00\x01' * 64 + b'\xff\x02', id='19'), + # 128 (max) literal, 128 (max) replicate + pytest.param([0, 1] * 64 + [2] * 128, b'\x7f' + b'\x00\x01' * 64 + b'\x81\x02', id='20'), + # 2 (min) replicate, 1 (min) literal + pytest.param([0, 0, 1], b'\xff\x00\x00\x01', id='21'), + # 2 (min) replicate, 128 (max) literal + pytest.param([0, 0] + [1, 2] * 64, b'\xff\x00\x7f' + b'\x01\x02' * 64, id='22'), + # 128 (max) replicate, 1 (min) literal + pytest.param([0] * 128 + [1], b'\x81\x00\x00\x01', id='23'), + # 128 (max) replicate, 128 (max) literal + pytest.param([0] * 128 + [1, 2] * 64, b'\x81\x00\x7f' + b'\x01\x02' * 64, id='24'), +] + + +class TestEncodeRow: + """Tests for _rle.encode_row.""" + @pytest.mark.parametrize('row, ref', REFERENCE_ENCODE_ROW) + def test_encode(self, row, ref): + """Test encoding an empty row.""" + row = np.asarray(row) + assert ref == encode_row(row.tobytes()) + + +@pytest.mark.skip() +class TestEncodeSegment: + """Tests for rle_handler._rle_encode_segment.""" + def test_one_row(self): + """Test encoding data that contains only a single row.""" + ds = dcmread(RLE_8_1_1F) + pixel_data = defragment_data(ds.PixelData) + decoded = _rle_decode_segment(pixel_data[64:]) + assert ds.Rows * ds.Columns == len(decoded) + arr = np.frombuffer(decoded, 'uint8').reshape(ds.Rows, ds.Columns) + + # Re-encode a single row of the decoded data + row = arr[0] + assert (ds.Columns,) == row.shape + encoded = _rle_encode_segment(row) + + # Decode the re-encoded data and check that it's the same + redecoded = _rle_decode_segment(encoded) + assert ds.Columns == len(redecoded) + assert decoded[:ds.Columns] == redecoded + + def test_cycle(self): + """Test the decoded data remains the same after encoding/decoding.""" + ds = dcmread(RLE_8_1_1F) + pixel_data = defragment_data(ds.PixelData) + decoded = _rle_decode_segment(pixel_data[64:]) + assert ds.Rows * ds.Columns == len(decoded) + arr = np.frombuffer(decoded, 'uint8').reshape(ds.Rows, ds.Columns) + # Re-encode the decoded data + encoded = _rle_encode_segment(arr) + + # Decode the re-encoded data and check that it's the same + redecoded = _rle_decode_segment(encoded) + assert ds.Rows * ds.Columns == len(redecoded) + assert decoded == redecoded + + +@pytest.mark.skip() +class TestEncodePlane: + """Tests for rle_handler._rle_encode_plane.""" + def test_8bit(self): + """Test encoding an 8-bit plane into 1 segment.""" + ds = dcmread(RLE_8_1_1F) + pixel_data = defragment_data(ds.PixelData) + decoded = _rle_decode_frame(pixel_data, ds.Rows, ds.Columns, + ds.SamplesPerPixel, ds.BitsAllocated) + assert ds.Rows * ds.Columns * ds.BitsAllocated // 8 == len(decoded) + arr = np.frombuffer(decoded, 'uint8').reshape(ds.Rows, ds.Columns) + # Re-encode the decoded data + encoded = bytearray() + nr_segments = 0 + for segment in _rle_encode_plane(arr): + encoded.extend(segment) + nr_segments += 1 + + # Add header + header = b'\x01\x00\x00\x00\x40\x00\x00\x00' + header += b'\x00' * (64 - len(header)) + + assert 1 == nr_segments + + # Decode the re-encoded data and check that it's the same + redecoded = _rle_decode_frame(header + encoded, + ds.Rows, ds.Columns, + ds.SamplesPerPixel, ds.BitsAllocated) + assert ds.Rows * ds.Columns * ds.SamplesPerPixel == len(redecoded) + assert decoded == redecoded + + def test_16bit(self): + """Test encoding a 16-bit plane into 2 segments.""" + ds = dcmread(RLE_16_1_1F) + pixel_data = defragment_data(ds.PixelData) + decoded = _rle_decode_frame(pixel_data, ds.Rows, ds.Columns, + ds.SamplesPerPixel, ds.BitsAllocated) + assert ds.Rows * ds.Columns * ds.BitsAllocated // 8 == len(decoded) + + # `decoded` is in big endian byte ordering + dtype = np.dtype('uint16').newbyteorder('>') + arr = np.frombuffer(decoded, dtype).reshape(ds.Rows, ds.Columns) + + # Re-encode the decoded data + encoded = bytearray() + nr_segments = 0 + offsets = [64] + for segment in _rle_encode_plane(arr): + offsets.append(offsets[nr_segments] + len(segment)) + encoded.extend(segment) + nr_segments += 1 + + assert 2 == nr_segments + + # Add header + header = b'\x02\x00\x00\x00' + header += pack('<2L', *offsets[:-1]) + header += b'\x00' * (64 - len(header)) + + # Decode the re-encoded data and check that it's the same + redecoded = _rle_decode_frame(header + encoded, + ds.Rows, ds.Columns, + ds.SamplesPerPixel, ds.BitsAllocated) + assert ds.Rows * ds.Columns * ds.BitsAllocated // 8 == len(redecoded) + assert decoded == redecoded + + def test_16bit_segment_order(self): + """Test that the segment order per 16-bit sample is correct.""" + # Native byte ordering + data = b'\x00\x00\x01\xFF\xFE\x00\xFF\xFF\x10\x12' + dtype = np.dtype('uint16') + arr = np.frombuffer(data, dtype) + + segments = [] + for segment in _rle_encode_plane(arr): + segments.append(segment) + + assert 2 == len(segments) + + # Each segment should start with a literal run marker of 0x04 + # and MSB should be first segment, then LSB in second + if sys.byteorder == 'little': + assert b'\x04\x00\xFF\x00\xFF\x12' == segments[0] + assert b'\x04\x00\x01\xFE\xFF\x10' == segments[1] + else: + assert b'\x04\x00\x01\xFE\xFF\x10' == segments[0] + assert b'\x04\x00\xFF\x00\xFF\x12' == segments[1] + + # Little endian + arr = np.frombuffer(data, dtype.newbyteorder('<')) + assert [0, 65281, 254, 65535, 4624] == arr.tolist() + + segments = [] + for segment in _rle_encode_plane(arr): + segments.append(segment) + + assert 2 == len(segments) + assert b'\x04\x00\xFF\x00\xFF\x12' == segments[0] + assert b'\x04\x00\x01\xFE\xFF\x10' == segments[1] + + # Big endian + arr = np.frombuffer(data, dtype.newbyteorder('>')) + assert [0, 511, 65024, 65535, 4114] == arr.tolist() + + segments = [] + for segment in _rle_encode_plane(arr): + segments.append(segment) + + assert 2 == len(segments) + assert b'\x04\x00\x01\xFE\xFF\x10' == segments[0] + assert b'\x04\x00\xFF\x00\xFF\x12' == segments[1] + + def test_32bit(self): + """Test encoding a 32-bit plane into 4 segments.""" + ds = dcmread(RLE_32_1_1F) + pixel_data = defragment_data(ds.PixelData) + decoded = _rle_decode_frame(pixel_data, ds.Rows, ds.Columns, + ds.SamplesPerPixel, ds.BitsAllocated) + assert ds.Rows * ds.Columns * ds.BitsAllocated // 8 == len(decoded) + + # `decoded` is in big endian byte ordering + dtype = np.dtype('uint32').newbyteorder('>') + arr = np.frombuffer(decoded, dtype).reshape(ds.Rows, ds.Columns) + + # Re-encode the decoded data + encoded = bytearray() + nr_segments = 0 + offsets = [64] + for segment in _rle_encode_plane(arr): + offsets.append(offsets[nr_segments] + len(segment)) + encoded.extend(segment) + nr_segments += 1 + + assert 4 == nr_segments + + # Add header + header = b'\x04\x00\x00\x00' + header += pack('<4L', *offsets[:-1]) + header += b'\x00' * (64 - len(header)) + + # Decode the re-encoded data and check that it's the same + redecoded = _rle_decode_frame(header + encoded, + ds.Rows, ds.Columns, + ds.SamplesPerPixel, ds.BitsAllocated) + assert ds.Rows * ds.Columns * ds.BitsAllocated // 8 == len(redecoded) + assert decoded == redecoded + + def test_32bit_segment_order(self): + """Test that the segment order per 32-bit sample is correct.""" + # Native byte ordering + data = b'\x00\x00\x00\x00\x01\xFF\xFE\x0A\xFF\xFC\x10\x12' + dtype = np.dtype('uint32') + arr = np.frombuffer(data, dtype) + + segments = [] + for segment in _rle_encode_plane(arr): + segments.append(segment) + + assert 4 == len(segments) + + # Each segment should start with a literal run marker of 0x02 + if sys.byteorder == 'little': + assert b'\x02\x00\x0A\x12' == segments[0] + assert b'\x02\x00\xFE\x10' == segments[1] + assert b'\x02\x00\xFF\xFC' == segments[2] + assert b'\x02\x00\x01\xFF' == segments[3] + else: + assert b'\x02\x00\x01\xFF' == segments[0] + assert b'\x02\x00\xFF\xFC' == segments[1] + assert b'\x02\x00\xFE\x10' == segments[2] + assert b'\x02\x00\x0A\x12' == segments[3] + + # Little endian + arr = np.frombuffer(data, dtype.newbyteorder('<')) + assert [0, 184483585, 303103231] == arr.tolist() + + segments = [] + for segment in _rle_encode_plane(arr): + segments.append(segment) + + assert 4 == len(segments) + assert b'\x02\x00\x0A\x12' == segments[0] + assert b'\x02\x00\xFE\x10' == segments[1] + assert b'\x02\x00\xFF\xFC' == segments[2] + assert b'\x02\x00\x01\xFF' == segments[3] + + # Big endian + arr = np.frombuffer(data, dtype.newbyteorder('>')) + assert [0, 33553930, 4294709266] == arr.tolist() + + segments = [] + for segment in _rle_encode_plane(arr): + segments.append(segment) + + assert 4 == len(segments) + assert b'\x02\x00\x01\xFF' == segments[0] + assert b'\x02\x00\xFF\xFC' == segments[1] + assert b'\x02\x00\xFE\x10' == segments[2] + assert b'\x02\x00\x0A\x12' == segments[3] + + def test_padding(self): + """Test that odd length encoded segments are padded.""" + data = b'\x00\x04\x01\x15' + arr = np.frombuffer(data, 'uint8') + segments = [] + for segment in _rle_encode_plane(arr): + segments.append(segment) + + # The segment should start with a literal run marker of 0x03 + # then 4 bytes of RLE encoded data, then 0x00 padding + assert b'\x03\x00\x04\x01\x15\x00' == segments[0] + + +@pytest.mark.skip() +class TestNumpy_RLEEncodeFrame: + """Tests for rle_handler.rle_encode_frame.""" + def setup(self): + """Setup the tests.""" + # Create a dataset skeleton for use in the cycle tests + ds = Dataset() + ds.file_meta = FileMetaDataset() + ds.file_meta.TransferSyntaxUID = '1.2.840.10008.1.2' + ds.Rows = 2 + ds.Columns = 4 + ds.SamplesPerPixel = 3 + ds.PlanarConfiguration = 1 + self.ds = ds + + def test_cycle_8bit_1sample(self): + """Test an encode/decode cycle for 8-bit 1 sample/pixel.""" + ds = dcmread(EXPL_8_1_1F) + ref = ds.pixel_array + assert 8 == ds.BitsAllocated + assert 1 == ds.SamplesPerPixel + + encoded = rle_encode_frame(ref) + decoded = _rle_decode_frame(encoded, ds.Rows, ds.Columns, + ds.SamplesPerPixel, ds.BitsAllocated) + dtype = np.dtype('uint8').newbyteorder('>') + arr = np.frombuffer(decoded, dtype) + arr = reshape_pixel_array(ds, arr) + + assert np.array_equal(ref, arr) + + def test_cycle_8bit_3sample(self): + """Test an encode/decode cycle for 8-bit 3 sample/pixel.""" + ds = dcmread(EXPL_8_3_1F) + ref = ds.pixel_array + assert 8 == ds.BitsAllocated + assert 3 == ds.SamplesPerPixel + + encoded = rle_encode_frame(ref) + decoded = _rle_decode_frame(encoded, ds.Rows, ds.Columns, + ds.SamplesPerPixel, ds.BitsAllocated) + arr = np.frombuffer(decoded, 'uint8') + # The decoded data is planar configuration 1 + ds.PlanarConfiguration = 1 + arr = reshape_pixel_array(ds, arr) + + assert np.array_equal(ref, arr) + + def test_cycle_16bit_1sample(self): + """Test an encode/decode cycle for 16-bit 1 sample/pixel.""" + ds = dcmread(EXPL_16_1_1F) + ref = ds.pixel_array + assert 16 == ds.BitsAllocated + assert 1 == ds.SamplesPerPixel + + encoded = rle_encode_frame(ref) + decoded = _rle_decode_frame(encoded, ds.Rows, ds.Columns, + ds.SamplesPerPixel, ds.BitsAllocated) + dtype = np.dtype('uint16').newbyteorder('>') + arr = np.frombuffer(decoded, dtype) + arr = reshape_pixel_array(ds, arr) + + assert np.array_equal(ref, arr) + + def test_cycle_16bit_3sample(self): + """Test an encode/decode cycle for 16-bit 3 sample/pixel.""" + ds = dcmread(EXPL_16_3_1F) + ref = ds.pixel_array + assert 16 == ds.BitsAllocated + assert 3 == ds.SamplesPerPixel + + encoded = rle_encode_frame(ref) + decoded = _rle_decode_frame(encoded, ds.Rows, ds.Columns, + ds.SamplesPerPixel, ds.BitsAllocated) + dtype = np.dtype('uint16').newbyteorder('>') + arr = np.frombuffer(decoded, dtype) + # The decoded data is planar configuration 1 + ds.PlanarConfiguration = 1 + arr = reshape_pixel_array(ds, arr) + + assert np.array_equal(ref, arr) + + def test_cycle_32bit_1sample(self): + """Test an encode/decode cycle for 32-bit 1 sample/pixel.""" + ds = dcmread(EXPL_32_1_1F) + ref = ds.pixel_array + assert 32 == ds.BitsAllocated + assert 1 == ds.SamplesPerPixel + + encoded = rle_encode_frame(ref) + decoded = _rle_decode_frame(encoded, ds.Rows, ds.Columns, + ds.SamplesPerPixel, ds.BitsAllocated) + dtype = np.dtype('uint32').newbyteorder('>') + arr = np.frombuffer(decoded, dtype) + arr = reshape_pixel_array(ds, arr) + + assert np.array_equal(ref, arr) + + def test_cycle_32bit_3sample(self): + """Test an encode/decode cycle for 32-bit 3 sample/pixel.""" + ds = dcmread(EXPL_32_3_1F) + ref = ds.pixel_array + assert 32 == ds.BitsAllocated + assert 3 == ds.SamplesPerPixel + + encoded = rle_encode_frame(ref) + decoded = _rle_decode_frame(encoded, ds.Rows, ds.Columns, + ds.SamplesPerPixel, ds.BitsAllocated) + dtype = np.dtype('uint32').newbyteorder('>') + arr = np.frombuffer(decoded, dtype) + # The decoded data is planar configuration 1 + ds.PlanarConfiguration = 1 + arr = reshape_pixel_array(ds, arr) + + assert np.array_equal(ref, arr) + + def test_16_segments_raises(self): + """Test that trying to encode 16-segments raises exception.""" + arr = np.asarray([[[1, 2, 3, 4]]], dtype='uint32') + assert (1, 1, 4) == arr.shape + assert 4 == arr.dtype.itemsize + + msg = ( + r"Unable to encode as the DICOM standard only allows " + r"a maximum of 15 segments in RLE encoded data" + ) + with pytest.raises(ValueError, match=msg): + rle_encode_frame(arr) + + def test_15_segment(self): + """Test encoding 15-segments works as expected.""" + arr = np.asarray( + [[[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]], + dtype='uint8' + ) + assert (1, 1, 15) == arr.shape + assert 1 == arr.dtype.itemsize + + encoded = rle_encode_frame(arr) + header = ( + b'\x0f\x00\x00\x00' + b'\x40\x00\x00\x00' + b'\x42\x00\x00\x00' + b'\x44\x00\x00\x00' + b'\x46\x00\x00\x00' + b'\x48\x00\x00\x00' + b'\x4a\x00\x00\x00' + b'\x4c\x00\x00\x00' + b'\x4e\x00\x00\x00' + b'\x50\x00\x00\x00' + b'\x52\x00\x00\x00' + b'\x54\x00\x00\x00' + b'\x56\x00\x00\x00' + b'\x58\x00\x00\x00' + b'\x5a\x00\x00\x00' + b'\x5c\x00\x00\x00' + ) + assert header == encoded[:64] + assert ( + b'\x00\x01\x00\x02\x00\x03\x00\x04\x00\x05\x00\x06' + b'\x00\x07\x00\x08\x00\x09\x00\x0a\x00\x0b\x00\x0c' + b'\x00\x0d\x00\x0e\x00\x0f' + ) == encoded[64:] + + def test_encoding_multiple_frames_raises(self): + """Test encoding multiple framed pixel data raises exception.""" + # Note: only works with multi-sample data + ds = dcmread(EXPL_8_3_2F) + arr = ds.pixel_array + assert ds.NumberOfFrames > 1 + assert len(arr.shape) == 4 + msg = ( + r"Unable to encode multiple frames at once, please encode one " + r"frame at a time" + ) + with pytest.raises(ValueError, match=msg): + rle_encode_frame(arr) + + def test_single_row_1sample(self): + """Test encoding a single row of 1 sample/pixel data.""" + # Rows 1, Columns 5, SamplesPerPixel 1 + arr = np.asarray([[0, 1, 2, 3, 4]], dtype='uint8') + assert (1, 5) == arr.shape + encoded = rle_encode_frame(arr) + header = b'\x01\x00\x00\x00\x40\x00\x00\x00' + b'\x00' * 56 + assert header == encoded[:64] + assert b'\x04\x00\x01\x02\x03\x04' == encoded[64:] + + def test_single_row_3sample(self): + """Test encoding a single row of 3 samples/pixel data.""" + # Rows 1, Columns 5, SamplesPerPixel 3 + arr = np.asarray( + [[[0, 0, 0], [1, 1, 1], [2, 2, 2], [3, 3, 3], [4, 4, 4]]], + dtype='uint8' + ) + assert (1, 5, 3) == arr.shape + encoded = rle_encode_frame(arr) + header = ( + b'\x03\x00\x00\x00' + b'\x40\x00\x00\x00' + b'\x46\x00\x00\x00' + b'\x4c\x00\x00\x00' + ) + header += b'\x00' * (64 - len(header)) + assert header == encoded[:64] + assert ( + b'\x04\x00\x01\x02\x03\x04' + b'\x04\x00\x01\x02\x03\x04' + b'\x04\x00\x01\x02\x03\x04' + ) == encoded[64:] diff --git a/src/lib.rs b/src/lib.rs index 9d2420d..85bcce8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -559,17 +559,44 @@ fn encode_frame() {} fn _encode_segment() -> Result<(), Box> { + // Each segment must be even length or padded to even length with noop byte Ok(()) } -fn _encode_row(s: &[u8]) -> Result<(Vec), Box> { +#[pyfunction] +fn encode_row<'a>(src: &[u8], py: Python<'a>) -> PyResult<&'a PyBytes> { + /* Return RLE encoded Python bytes. + + Parameters + ---------- + src : bytes + The raw data to be encoded. + + Returns + ------- + bytes + The RLE encoded data. + */ + // Be nice to make use of threading for row encoding + // The encoded row data + let mut dst: Vec = Vec::with_capacity(raw.len()); + match _decode_segment(src, &mut dst) { + Ok(()) => return Ok(PyBytes::new(py, &dst[..])), + Err(err) => return Err(PyValueError::new_err(err.to_string())), + } +} + + +fn _encode_row(src: &[u8], dst: Vec) -> Result<(), Box> { /* Return a numpy array as an RLE encoded bytearray. Parameters ---------- - s - The bytes to be encoded. + src + The data to be encoded + dst + The destination for the encoded data Returns ------- @@ -582,7 +609,86 @@ fn _encode_row(s: &[u8]) -> Result<(Vec), Box> { * 2-byte repeat runs are always encoded as Replicate Runs rather than only when not preceeded by a Literal Run as suggested by the Standard. */ - // Read ahead and find first non-identical value + + // Reminders: + // * Each image row is encoded separately + // * Literal runs are a non-repetitive stream + // * Replicate runs are a repetitive stream + // * Maximum length of literal/replicate runs is 128 bytes + + // Maximum length of a literal/replicate run + let MAX_RUN: u8 = 128; + // Track how long the current literal run is + let mut literal: i8 = 0; + // Track how long the current replicate run is + let mut replicate: i8 = 0; + + let MAX_SRC = src.len(); + + let mut ii: usize = 0; + let mut previous: u8 = *src[ii]; // Initial value + + // Check there is an initial value (or two) + + loop { + ii += 1; + current = *src[ii]; + + println!( + "ii: {}, prev: {}, cur: {}, l: {}, r: {}", + ii, previous, current, literal, replicate + ); + + // While we still have data in src... + // For each item in src... + + // Literal Run + // ----------- + if current = previous { + if literal > 0 { + // + } + } + // if currently in literal run... + // if the current value is still different to previous: + // continue literal run + // else + // if literal > 2?: + // write out literal + // reset run + // else: + // switch to replicate + if literal == MAX_RUN { + // Write out run and reset + dst.push(MAX_RUN); // Er..? Convert? + dst.extend(&src[a..b]); // or something like that + literal = 0; + replicate = 0; + } + + // Replicate Run + // ----------- + // if currently in replicate run... + // if the current value is the same as the previous value: + // continue replicate run + // else + // if replicate > 2?; + // write out run + // reset + // else: + // switch to literal + if replicate == MAX_RUN { + // Write out run and reset + dst.push(MAX_RUN); // Er..? Convert ...maybe 257 - 128? + dst.push(current); + literal = 0; + replicate = 0; + } + // else reset and start literal run + // literal run + + if ii = MAX_SRC { return Ok(()) } + } // out = [] // out_append = out.append @@ -632,6 +738,7 @@ fn _rle(_: Python, m: &PyModule) -> PyResult<()> { m.add_function(wrap_pyfunction!(decode_segment, m)?).unwrap(); m.add_function(wrap_pyfunction!(decode_frame, m)?).unwrap(); + m.add_function(wrap_pyfunction!(encode_row, m)?).unwrap(); m.add_function(wrap_pyfunction!(encode_frame, m)?).unwrap(); Ok(()) From 8c11c9f54206f14d086f74c78caedc6d565aa8f6 Mon Sep 17 00:00:00 2001 From: scaramallion Date: Sat, 24 Apr 2021 09:43:42 +1000 Subject: [PATCH 03/19] More WIP logic --- src/lib.rs | 242 ++++++++++++----------------------------------------- 1 file changed, 53 insertions(+), 189 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 85bcce8..d952bbb 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -248,151 +248,6 @@ fn _decode_frame( } -// About twice as slow as _decode_frame -fn _decode_frame_alt( - enc: &[u8], px_per_sample: u32, bits_per_px: u8 -) -> Result, Box> { - /* Return the decoded frame. - - Parameters - ---------- - enc - The RLE encoded frame. - px_per_sample - The number of pixels per sample (rows x columns), maximum (2^32 - 1). - bits_per_px - The number of bits per pixel, should be a multiple of 8 and no larger than 64. - */ - - // Pre-define our errors for neatness - let err_bits_zero = Err( - String::from("The 'Bits Allocated' value must be greater than 0").into(), - ); - let err_bits_not_octal = Err( - String::from("The 'Bits Allocated' value must be a multiple of 8").into(), - ); - let err_invalid_bytes = Err( - String::from("A 'Bits Allocated' value greater than 64 is not supported").into() - ); - let err_invalid_offset = Err( - String::from("Invalid segment offset found in the RLE header").into() - ); - let err_insufficient_data = Err( - String::from("Frame is not long enough to contain RLE encoded data").into() - ); - let err_invalid_nr_samples = Err( - String::from("The 'Samples Per Pixel' must be 1 or 3").into() - ); - let err_segment_length = Err( - String::from("The decoded segment length does not match the expected length").into() - ); - - // Ensure we have a valid bits/px value - match bits_per_px { - 0 => return err_bits_zero, - _ => match bits_per_px % 8 { - 0 => {}, - _ => return err_bits_not_octal - } - } - - // Ensure `bytes_per_pixel` is in [1, 8] - let bytes_per_pixel: u8 = bits_per_px / 8; - if bytes_per_pixel > 8 { return err_invalid_bytes } - - // Parse the RLE header and check results - // -------------------------------------- - // Ensure we have at least enough data for the RLE header - let encoded_length = enc.len(); - if encoded_length < 64 { return err_insufficient_data } - - let header = <&[u8; 64]>::try_from(&enc[0..64]).unwrap(); - let all_offsets: [u32; 15] = _parse_header(header); - - // Ensure we have at least enough encoded data to hit the segment offsets - let max_offset = *all_offsets.iter().max().unwrap() as usize; - if max_offset > encoded_length - 2 { return err_invalid_offset } - - // Get non-zero offsets and determine the number of segments - let mut nr_segments: u8 = 0; // `nr_segments` is in [0, 15] - let mut offsets: Vec = Vec::with_capacity(15); - for val in all_offsets.iter().filter(|&n| *n != 0) { - offsets.push(*val); - nr_segments += 1u8; - } - - // First offset must always be 64 - if offsets[0] != 64 { return err_invalid_offset } - - // Ensure we have a final ending offset at the end of the data - offsets.push(u32::try_from(encoded_length).unwrap()); - - // Ensure offsets are in increasing order - let mut last: u32 = 0; - for val in offsets.iter() { - if *val <= last { - return err_invalid_offset - } - last = *val; - } - - // Check the samples per pixel is conformant - let samples_per_px: u8 = nr_segments / bytes_per_pixel; - match samples_per_px { - 1 => {}, - 3 => {}, - _ => return err_invalid_nr_samples - } - - // Watch for overflow here; u32 * u32 -> u64 - let expected_length = usize::try_from( - px_per_sample * u32::from(bytes_per_pixel * samples_per_px) - ).unwrap(); - - // Pre-allocate a vector for the decoded frame - let mut frame = vec![0u8; expected_length]; - - // Decode each segment and place it into the vector - // ------------------------------------------------ - let bpp = usize::from(bytes_per_pixel); - let pps = usize::try_from(px_per_sample).unwrap(); - // Concatenate sample planes into a frame - for sample in 0..samples_per_px { // 0 or (0, 1, 2) - // Sample offset - let so = usize::from(sample * bytes_per_pixel) * pps; - - // Interleave the segments into a sample plane - for byte_offset in 0..bytes_per_pixel { // 0, [1, 2, 3, ..., 7] - // idx should be in range [0, 23], but max is 15 - let idx = usize::from(sample * bytes_per_pixel + byte_offset); - - // offsets[idx] is u32 -> usize not guaranteed - let start = usize::try_from(offsets[idx]).unwrap(); - let end = usize::try_from(offsets[idx + 1]).unwrap(); - - // Pre-allocate a vector for the decoded segment - let mut segment = Vec::with_capacity(pps); - // Decode the segment - _decode_segment( - <&[u8]>::try_from(&enc[start..end]).unwrap(), - &mut segment - )?; - - // Check decoded segment length is good - if segment.len() != pps { return err_segment_length } - - // Interleave segment into frame - let bo = usize::from(byte_offset) + so; - for (ii, v) in segment.iter().enumerate() { - frame[ii * bpp + bo] = *v; - } - } - } - - Ok(frame) -} - - fn _decode_segment_into_frame( enc: &[u8], frame: &mut Vec, bpp: usize, initial_offset: usize ) -> Result> { @@ -566,7 +421,7 @@ fn _encode_segment() -> Result<(), Box> { #[pyfunction] fn encode_row<'a>(src: &[u8], py: Python<'a>) -> PyResult<&'a PyBytes> { - /* Return RLE encoded Python bytes. + /* Return RLE encoded data as Python bytes. Parameters ---------- @@ -589,7 +444,7 @@ fn encode_row<'a>(src: &[u8], py: Python<'a>) -> PyResult<&'a PyBytes> { fn _encode_row(src: &[u8], dst: Vec) -> Result<(), Box> { - /* Return a numpy array as an RLE encoded bytearray. + /* RLE encode `src` into `dst` Parameters ---------- @@ -597,17 +452,6 @@ fn _encode_row(src: &[u8], dst: Vec) -> Result<(), Box> { The data to be encoded dst The destination for the encoded data - - Returns - ------- - Vec - The RLE encoded row, following the format specified by the DICOM - Standard, Part 5, :dcm:`Annex G` - - Notes - ----- - * 2-byte repeat runs are always encoded as Replicate Runs rather than - only when not preceeded by a Literal Run as suggested by the Standard. */ // Reminders: @@ -639,16 +483,35 @@ fn _encode_row(src: &[u8], dst: Vec) -> Result<(), Box> { ii, previous, current, literal, replicate ); - // While we still have data in src... - // For each item in src... - - // Literal Run - // ----------- - if current = previous { - if literal > 0 { - // + // Run type switching/control + // -------------------------- + if current == previous { + // If current == previous: + // if literal run, write out data and reset + // else increment replicate run + if literal > 0 { // Should be a run of at least 2? + // Write out and reset + dst.push(literal); + dst.push(&src[a..b]); + literal = 0; } + replicate += 1; + } else { + // If current != previous + // if replicate run, write out data and reset + // else increment literal run + if replicate > 0 { + // write out and reset + dst.push(replicate); // Something like that + dst.push(current); + replicate = 0; + } + literal += 1; } + + // OK, to reach here run length must be > 0 (obviously) + + // if currently in literal run... // if the current value is still different to previous: // continue literal run @@ -658,38 +521,39 @@ fn _encode_row(src: &[u8], dst: Vec) -> Result<(), Box> { // reset run // else: // switch to replicate - if literal == MAX_RUN { - // Write out run and reset - dst.push(MAX_RUN); // Er..? Convert? - dst.extend(&src[a..b]); // or something like that - literal = 0; - replicate = 0; - } - // Replicate Run - // ----------- - // if currently in replicate run... - // if the current value is the same as the previous value: - // continue replicate run - // else - // if replicate > 2?; - // write out run - // reset - // else: - // switch to literal + // If the run length is maxxed out, write out and reset + // Hmm, is it faster to combine the length check? Probably about the same if replicate == MAX_RUN { - // Write out run and reset - dst.push(MAX_RUN); // Er..? Convert ...maybe 257 - 128? + // FIXME + dst.push(MAX_RUN); dst.push(current); literal = 0; replicate = 0; + continue; + } else if replicate == MAX_RUN { + // FIXME + dst.push(MAX_RUN); // Er..? Convert? + dst.extend(&src[a..b]); // something like that + literal = 0; + replicate = 0; + continue; } - // else reset and start literal run - // literal run - if ii = MAX_SRC { return Ok(()) } + // At this point 0 < run length < MAX_RUN, so loop + // -------------------------------------- + if ii = MAX_SRC { break; } } + // No more data, finish up the last write operation + if replicate > 0 { + // Write out and exit + } else if literal > 0 { + // Write out and exit + } + + Ok(()) + // out = [] // out_append = out.append // out_extend = out.extend From b4546dfe720be8cac9aeede09292016c1b0b5ba6 Mon Sep 17 00:00:00 2001 From: scaramallion Date: Sat, 24 Apr 2021 10:07:26 +1000 Subject: [PATCH 04/19] More logic thinking --- src/lib.rs | 68 +++++++++++++++--------------------------------------- 1 file changed, 19 insertions(+), 49 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index d952bbb..39c418b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -434,8 +434,12 @@ fn encode_row<'a>(src: &[u8], py: Python<'a>) -> PyResult<&'a PyBytes> { The RLE encoded data. */ // Be nice to make use of threading for row encoding - // The encoded row data - let mut dst: Vec = Vec::with_capacity(raw.len()); + + // Assuming all literal runs, `dst` can never be greater than + // ceil(src.len() / 128) + src.len() + + // Close enough... + let mut dst: Vec = Vec::with_capacity(raw.len() + raw.len() / 128 + 1); match _decode_segment(src, &mut dst) { Ok(()) => return Ok(PyBytes::new(py, &dst[..])), Err(err) => return Err(PyValueError::new_err(err.to_string())), @@ -458,6 +462,7 @@ fn _encode_row(src: &[u8], dst: Vec) -> Result<(), Box> { // * Each image row is encoded separately // * Literal runs are a non-repetitive stream // * Replicate runs are a repetitive stream + // * 3-byte repeats shall be encoded as replicate runs // * Maximum length of literal/replicate runs is 128 bytes // Maximum length of a literal/replicate run @@ -475,8 +480,8 @@ fn _encode_row(src: &[u8], dst: Vec) -> Result<(), Box> { // Check there is an initial value (or two) loop { - ii += 1; - current = *src[ii]; + current = *src[ii + 1]; + ii += 1; // fixme println!( "ii: {}, prev: {}, cur: {}, l: {}, r: {}", @@ -492,9 +497,11 @@ fn _encode_row(src: &[u8], dst: Vec) -> Result<(), Box> { if literal > 0 { // Should be a run of at least 2? // Write out and reset dst.push(literal); - dst.push(&src[a..b]); + dst.push(&src[a..b]); // maybe b - 1 literal = 0; + // *switch to replicate run* } + // If switching lit -> repl this is `current` item replicate += 1; } else { // If current != previous @@ -503,10 +510,12 @@ fn _encode_row(src: &[u8], dst: Vec) -> Result<(), Box> { if replicate > 0 { // write out and reset dst.push(replicate); // Something like that - dst.push(current); + dst.push(previous); replicate = 0; + // *switch to literal run* } - literal += 1; + // If switching repl -> lit this is `current` item + literal = 1; } // OK, to reach here run length must be > 0 (obviously) @@ -522,7 +531,7 @@ fn _encode_row(src: &[u8], dst: Vec) -> Result<(), Box> { // else: // switch to replicate - // If the run length is maxxed out, write out and reset + // If the run length is maxed out, write out and reset // Hmm, is it faster to combine the length check? Probably about the same if replicate == MAX_RUN { // FIXME @@ -545,53 +554,14 @@ fn _encode_row(src: &[u8], dst: Vec) -> Result<(), Box> { if ii = MAX_SRC { break; } } - // No more data, finish up the last write operation + // No more source data, finish up the last write operation if replicate > 0 { // Write out and exit } else if literal > 0 { // Write out and exit } - Ok(()) - - // out = [] - // out_append = out.append - // out_extend = out.extend - // - // literal = [] - // for key, group in groupby(arr.astype('uint8').tolist()): - // group = list(group) - // if len(group) == 1: - // literal.append(group[0]) - // else: - // if literal: - // # Literal runs - // for ii in range(0, len(literal), 128): - // _run = literal[ii:ii + 128] - // out_append(len(_run) - 1) - // out_extend(_run) - // - // literal = [] - // - // # Replicate run - // for ii in range(0, len(group), 128): - // if len(group[ii:ii + 128]) > 1: - // # Replicate run - // out_append(257 - len(group[ii:ii + 128])) - // out_append(group[0]) - // else: - // # Literal run only if last replicate part is length 1 - // out_append(0) - // out_append(group[0]) - // - // # Final literal run if literal isn't followed by a replicate run - // for ii in range(0, len(literal), 128): - // _run = literal[ii:ii + 128] - // out_append(len(_run) - 1) - // out_extend(_run) - // - // return pack('{}B'.format(len(out)), *out) - + // I think that's sufficient, now to fix it up... Ok(()) } From d4a64078f41da1328faad375264d23e4818df2dc Mon Sep 17 00:00:00 2001 From: scaramallion Date: Sat, 24 Apr 2021 10:36:15 +1000 Subject: [PATCH 05/19] Start testing --- src/lib.rs | 105 +++++++++++++++++++++++++++-------------------------- 1 file changed, 53 insertions(+), 52 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 39c418b..3235e1a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -453,10 +453,18 @@ fn _encode_row(src: &[u8], dst: Vec) -> Result<(), Box> { Parameters ---------- src - The data to be encoded + The data to be encoded, must contain at least 2 items. dst - The destination for the encoded data + The destination for the encoded data. */ + let err_short_src = Err( + String::from( + "RLE encoding requires a segment row length of at least 2 bytes" + ).into(), + ); + + // Check we have the minimum required amount of data + if src.len() < 2 { return err_short_src } // Reminders: // * Each image row is encoded separately @@ -465,23 +473,27 @@ fn _encode_row(src: &[u8], dst: Vec) -> Result<(), Box> { // * 3-byte repeats shall be encoded as replicate runs // * Maximum length of literal/replicate runs is 128 bytes + // Replicate run: dst += [count, value] + // count: number of bytes in the run (i8 = -replicate + 1) + // value: the value of the repeating byte + + // Literal run: dst += [count, a, b, c, ...] + // count: number of bytes in the literal stream (i8 = literal - 1) + // a, b, c, ...: the literal stream + // Maximum length of a literal/replicate run let MAX_RUN: u8 = 128; // Track how long the current literal run is - let mut literal: i8 = 0; + // TODO: Check if math is more efficient for i8 + let mut literal: usize = 0; // Track how long the current replicate run is - let mut replicate: i8 = 0; - - let MAX_SRC = src.len(); + let mut replicate: usize = 0; let mut ii: usize = 0; - let mut previous: u8 = *src[ii]; // Initial value - - // Check there is an initial value (or two) + let mut previous: u8 = *src[ii]; loop { current = *src[ii + 1]; - ii += 1; // fixme println!( "ii: {}, prev: {}, cur: {}, l: {}, r: {}", @@ -491,77 +503,66 @@ fn _encode_row(src: &[u8], dst: Vec) -> Result<(), Box> { // Run type switching/control // -------------------------- if current == previous { - // If current == previous: - // if literal run, write out data and reset - // else increment replicate run + // If in a literal run then write and reset to replicate if literal > 0 { // Should be a run of at least 2? - // Write out and reset - dst.push(literal); - dst.push(&src[a..b]); // maybe b - 1 + // Write out and reset literal runs + dst.push(literal - 1); + // Indexing here is probably wrong + // Write from start of literal run to previous item + dst.push(&src[(ii - literal)..(ii))]); literal = 0; // *switch to replicate run* } - // If switching lit -> repl this is `current` item + // If switching to replicate run this is `current` item replicate += 1; } else { - // If current != previous - // if replicate run, write out data and reset - // else increment literal run + // If in a replicate run then write and reset to literal if replicate > 0 { - // write out and reset - dst.push(replicate); // Something like that + // Write out and reset + dst.push(257 - replicate); dst.push(previous); replicate = 0; // *switch to literal run* } - // If switching repl -> lit this is `current` item - literal = 1; + // If switching to literal run this is `current` item + literal += 1; } // OK, to reach here run length must be > 0 (obviously) - - // if currently in literal run... - // if the current value is still different to previous: - // continue literal run - // else - // if literal > 2?: - // write out literal - // reset run - // else: - // switch to replicate - - // If the run length is maxed out, write out and reset - // Hmm, is it faster to combine the length check? Probably about the same - if replicate == MAX_RUN { - // FIXME - dst.push(MAX_RUN); - dst.push(current); - literal = 0; + // If the run length is maxed, write out and reset + if replicate == MAX_RUN { // Should be more frequent + // Write out replicate run and reset + dst.push(129); + dst.push(previous); // previous or current? replicate = 0; - continue; - } else if replicate == MAX_RUN { - // FIXME - dst.push(MAX_RUN); // Er..? Convert? + } else if literal == MAX_RUN { + // Write out literal run and reset + dst.push(127); dst.extend(&src[a..b]); // something like that literal = 0; - replicate = 0; - continue; - } + } // 128 is noop! // At this point 0 < run length < MAX_RUN, so loop // -------------------------------------- + + ii += 1; // fixme? if ii = MAX_SRC { break; } + + previous = current; } // No more source data, finish up the last write operation if replicate > 0 { - // Write out and exit + // Write out and return + dst.push(257 - replicate); + dst.push(previous); } else if literal > 0 { - // Write out and exit + // Write out and return + dst.push(literal - 1); + dst.push(&src[(MAX_SRC - literal)..MAX_SRC)]); } - // I think that's sufficient, now to fix it up... Ok(()) } From 0e2c94e52e6f3d6687ca528eacabf64f2d486849 Mon Sep 17 00:00:00 2001 From: scaramallion Date: Sat, 24 Apr 2021 10:48:38 +1000 Subject: [PATCH 06/19] Fix syntax, etc --- src/lib.rs | 43 ++++++++++++++++++++++++++----------------- 1 file changed, 26 insertions(+), 17 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 3235e1a..2ccba13 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -439,7 +439,7 @@ fn encode_row<'a>(src: &[u8], py: Python<'a>) -> PyResult<&'a PyBytes> { // ceil(src.len() / 128) + src.len() // Close enough... - let mut dst: Vec = Vec::with_capacity(raw.len() + raw.len() / 128 + 1); + let mut dst: Vec = Vec::with_capacity(src.len() + src.len() / 128 + 1); match _decode_segment(src, &mut dst) { Ok(()) => return Ok(PyBytes::new(py, &dst[..])), Err(err) => return Err(PyValueError::new_err(err.to_string())), @@ -447,7 +447,8 @@ fn encode_row<'a>(src: &[u8], py: Python<'a>) -> PyResult<&'a PyBytes> { } -fn _encode_row(src: &[u8], dst: Vec) -> Result<(), Box> { +#[allow(overflowing_literals)] +fn _encode_row(src: &[u8], dst: &mut Vec) -> Result<(), Box> { /* RLE encode `src` into `dst` Parameters @@ -482,18 +483,20 @@ fn _encode_row(src: &[u8], dst: Vec) -> Result<(), Box> { // a, b, c, ...: the literal stream // Maximum length of a literal/replicate run - let MAX_RUN: u8 = 128; + let MAX_RUN: usize = 128; + let MAX_SRC = src.len(); // Track how long the current literal run is // TODO: Check if math is more efficient for i8 - let mut literal: usize = 0; + let mut literal: u8 = 0; // Track how long the current replicate run is - let mut replicate: usize = 0; + let mut replicate: u8 = 0; let mut ii: usize = 0; - let mut previous: u8 = *src[ii]; + let mut previous: u8 = src[ii]; + let mut current: u8; loop { - current = *src[ii + 1]; + current = src[ii + 1]; println!( "ii: {}, prev: {}, cur: {}, l: {}, r: {}", @@ -506,10 +509,12 @@ fn _encode_row(src: &[u8], dst: Vec) -> Result<(), Box> { // If in a literal run then write and reset to replicate if literal > 0 { // Should be a run of at least 2? // Write out and reset literal runs - dst.push(literal - 1); + dst.push(literal - 1u8); // Indexing here is probably wrong // Write from start of literal run to previous item - dst.push(&src[(ii - literal)..(ii))]); + for idx in (ii - usize::from(literal))..ii { + dst.push(src[idx]); + } literal = 0; // *switch to replicate run* } @@ -519,7 +524,7 @@ fn _encode_row(src: &[u8], dst: Vec) -> Result<(), Box> { // If in a replicate run then write and reset to literal if replicate > 0 { // Write out and reset - dst.push(257 - replicate); + dst.push(u8::try_from(257 - replicate)?); dst.push(previous); replicate = 0; // *switch to literal run* @@ -531,15 +536,17 @@ fn _encode_row(src: &[u8], dst: Vec) -> Result<(), Box> { // OK, to reach here run length must be > 0 (obviously) // If the run length is maxed, write out and reset - if replicate == MAX_RUN { // Should be more frequent + if usize::from(replicate) == MAX_RUN { // Should be more frequent // Write out replicate run and reset dst.push(129); dst.push(previous); // previous or current? replicate = 0; - } else if literal == MAX_RUN { + } else if usize::from(literal) == MAX_RUN { // Write out literal run and reset dst.push(127); - dst.extend(&src[a..b]); // something like that + for idx in (ii - usize::from(literal))..ii { // previous or current? + dst.push(src[idx]); + } literal = 0; } // 128 is noop! @@ -547,7 +554,7 @@ fn _encode_row(src: &[u8], dst: Vec) -> Result<(), Box> { // -------------------------------------- ii += 1; // fixme? - if ii = MAX_SRC { break; } + if ii == MAX_SRC { break; } previous = current; } @@ -555,12 +562,14 @@ fn _encode_row(src: &[u8], dst: Vec) -> Result<(), Box> { // No more source data, finish up the last write operation if replicate > 0 { // Write out and return - dst.push(257 - replicate); + dst.push(u8::try_from(257 - replicate)?); dst.push(previous); } else if literal > 0 { // Write out and return - dst.push(literal - 1); - dst.push(&src[(MAX_SRC - literal)..MAX_SRC)]); + dst.push(literal - 1u8); + for idx in (MAX_SRC - usize::from(literal))..MAX_SRC { + dst.push(src[idx]); + } } Ok(()) From e365369349a30a7a2aea89d15db1f6ac63af51aa Mon Sep 17 00:00:00 2001 From: scaramallion Date: Sat, 24 Apr 2021 11:09:30 +1000 Subject: [PATCH 07/19] First passing test --- rle/_version.py | 2 +- rle/tests/test_encode.py | 2 +- src/lib.rs | 30 ++++++++++++++++++++---------- 3 files changed, 22 insertions(+), 12 deletions(-) diff --git a/rle/_version.py b/rle/_version.py index 266adfa..a38ac7f 100644 --- a/rle/_version.py +++ b/rle/_version.py @@ -3,7 +3,7 @@ import re -__version__ = '1.0.0' +__version__ = '1.1.0.dev0' VERSION_PATTERN = r""" diff --git a/rle/tests/test_encode.py b/rle/tests/test_encode.py index c49dfc5..c8cc95a 100644 --- a/rle/tests/test_encode.py +++ b/rle/tests/test_encode.py @@ -63,7 +63,7 @@ class TestEncodeRow: @pytest.mark.parametrize('row, ref', REFERENCE_ENCODE_ROW) def test_encode(self, row, ref): """Test encoding an empty row.""" - row = np.asarray(row) + row = np.asarray(row, dtype='uint8') assert ref == encode_row(row.tobytes()) diff --git a/src/lib.rs b/src/lib.rs index 2ccba13..236a9aa 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -440,7 +440,7 @@ fn encode_row<'a>(src: &[u8], py: Python<'a>) -> PyResult<&'a PyBytes> { // Close enough... let mut dst: Vec = Vec::with_capacity(src.len() + src.len() / 128 + 1); - match _decode_segment(src, &mut dst) { + match _encode_row(src, &mut dst) { Ok(()) => return Ok(PyBytes::new(py, &dst[..])), Err(err) => return Err(PyValueError::new_err(err.to_string())), } @@ -495,6 +495,8 @@ fn _encode_row(src: &[u8], dst: &mut Vec) -> Result<(), Box> { let mut previous: u8 = src[ii]; let mut current: u8; + println!("Max src {}", MAX_SRC); + loop { current = src[ii + 1]; @@ -522,26 +524,25 @@ fn _encode_row(src: &[u8], dst: &mut Vec) -> Result<(), Box> { replicate += 1; } else { // If in a replicate run then write and reset to literal - if replicate > 0 { + if replicate > 1 { // Write out and reset dst.push(u8::try_from(257 - replicate)?); dst.push(previous); replicate = 0; // *switch to literal run* - } + } // else do what? + // If switching to literal run this is `current` item literal += 1; } - // OK, to reach here run length must be > 0 (obviously) - // If the run length is maxed, write out and reset - if usize::from(replicate) == MAX_RUN { // Should be more frequent + if usize::from(replicate) + 1 == MAX_RUN { // Should be more frequent // Write out replicate run and reset dst.push(129); - dst.push(previous); // previous or current? + dst.push(previous); replicate = 0; - } else if usize::from(literal) == MAX_RUN { + } else if usize::from(literal) + 1 == MAX_RUN { // Write out literal run and reset dst.push(127); for idx in (ii - usize::from(literal))..ii { // previous or current? @@ -554,18 +555,27 @@ fn _encode_row(src: &[u8], dst: &mut Vec) -> Result<(), Box> { // -------------------------------------- ii += 1; // fixme? - if ii == MAX_SRC { break; } + if ii == MAX_SRC - 1 { break; } previous = current; } + println!( + "Post-loop ii: {}, prev: {}, cur: {}, l: {}, r: {}", + ii, previous, current, literal, replicate + ); + println!("dst {:?}", dst); + // No more source data, finish up the last write operation - if replicate > 0 { + if replicate > 1 { // Write out and return + // `replicate` must be at least 2 or we overflow dst.push(u8::try_from(257 - replicate)?); dst.push(previous); + // else if replicate == 1 do what? } else if literal > 0 { // Write out and return + // `literal` must be at least 1 or we undeflow dst.push(literal - 1u8); for idx in (MAX_SRC - usize::from(literal))..MAX_SRC { dst.push(src[idx]); From 4d3bee20c1bb48ef7ebb307465141fe0c4190844 Mon Sep 17 00:00:00 2001 From: scaramallion Date: Sat, 24 Apr 2021 13:06:26 +1000 Subject: [PATCH 08/19] WIP --- rle/tests/test_encode.py | 32 ++++++++++++---- src/lib.rs | 83 +++++++++++++++++++++++++++------------- 2 files changed, 80 insertions(+), 35 deletions(-) diff --git a/rle/tests/test_encode.py b/rle/tests/test_encode.py index c8cc95a..36f7370 100644 --- a/rle/tests/test_encode.py +++ b/rle/tests/test_encode.py @@ -9,9 +9,9 @@ # Tests for RLE encoding REFERENCE_ENCODE_ROW = [ # Input, output - pytest.param([], b'', id='1'), + pytest.param([], b'', id='0'), # Replicate run tests - # 2 (min) replicate + # 2 (min) replicate, could also be a 2 (min) literal pytest.param([0] * 2, b'\xff\x00', id='1'), pytest.param([0] * 3, b'\xfe\x00', id='2'), pytest.param([0] * 64, b'\xc1\x00', id='3'), @@ -31,11 +31,15 @@ pytest.param([0, 1, 2], b'\x02\x00\x01\x02', id='11'), pytest.param([0, 1] * 32, b'\x3f' + b'\x00\x01' * 32, id='12'), # 127 literal - pytest.param([0, 1] * 63 + [2], b'\x7e' + b'\x00\x01' * 63 + b'\x02', id='13'), + pytest.param( + [0, 1] * 63 + [2], b'\x7e' + b'\x00\x01' * 63 + b'\x02', id='13' + ), # 128 literal (max) pytest.param([0, 1] * 64, b'\x7f' + b'\x00\x01' * 64, id='14'), # 128 (max) literal, 1 (min) literal - pytest.param([0, 1] * 64 + [2], b'\x7f' + b'\x00\x01' * 64 + b'\x00\x02', id='15'), + pytest.param( + [0, 1] * 64 + [2], b'\x7f' + b'\x00\x01' * 64 + b'\x00\x02', id='15' + ), # 128 (max) x 5 literals pytest.param([0, 1] * 64 * 5, (b'\x7f' + b'\x00\x01' * 64) * 5, id='16'), # Combination run tests @@ -44,17 +48,29 @@ # 1 (min) literal, 128 (max) replicate pytest.param([0] + [1] * 128, b'\x00\x00\x81\x01', id='18'), # 128 (max) literal, 2 (min) replicate - pytest.param([0, 1] * 64 + [2] * 2, b'\x7f' + b'\x00\x01' * 64 + b'\xff\x02', id='19'), + pytest.param( + [0, 1] * 64 + [2] * 2, + b'\x7f' + b'\x00\x01' * 64 + b'\xff\x02', + id='19' + ), # 128 (max) literal, 128 (max) replicate - pytest.param([0, 1] * 64 + [2] * 128, b'\x7f' + b'\x00\x01' * 64 + b'\x81\x02', id='20'), + pytest.param( + [0, 1] * 64 + [2] * 128, + b'\x7f' + b'\x00\x01' * 64 + b'\x81\x02', + id='20' + ), # 2 (min) replicate, 1 (min) literal pytest.param([0, 0, 1], b'\xff\x00\x00\x01', id='21'), # 2 (min) replicate, 128 (max) literal - pytest.param([0, 0] + [1, 2] * 64, b'\xff\x00\x7f' + b'\x01\x02' * 64, id='22'), + pytest.param( + [0, 0] + [1, 2] * 64, b'\xff\x00\x7f' + b'\x01\x02' * 64, id='22' + ), # 128 (max) replicate, 1 (min) literal pytest.param([0] * 128 + [1], b'\x81\x00\x00\x01', id='23'), # 128 (max) replicate, 128 (max) literal - pytest.param([0] * 128 + [1, 2] * 64, b'\x81\x00\x7f' + b'\x01\x02' * 64, id='24'), + pytest.param( + [0] * 128 + [1, 2] * 64, b'\x81\x00\x7f' + b'\x01\x02' * 64, id='24' + ), ] diff --git a/src/lib.rs b/src/lib.rs index 236a9aa..f87c810 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -439,7 +439,7 @@ fn encode_row<'a>(src: &[u8], py: Python<'a>) -> PyResult<&'a PyBytes> { // ceil(src.len() / 128) + src.len() // Close enough... - let mut dst: Vec = Vec::with_capacity(src.len() + src.len() / 128 + 1); + let mut dst = Vec::with_capacity(src.len() + src.len() / 128 + 1); match _encode_row(src, &mut dst) { Ok(()) => return Ok(PyBytes::new(py, &dst[..])), Err(err) => return Err(PyValueError::new_err(err.to_string())), @@ -458,20 +458,17 @@ fn _encode_row(src: &[u8], dst: &mut Vec) -> Result<(), Box> { dst The destination for the encoded data. */ - let err_short_src = Err( - String::from( - "RLE encoding requires a segment row length of at least 2 bytes" - ).into(), - ); - - // Check we have the minimum required amount of data - if src.len() < 2 { return err_short_src } + //let err_short_src = Err( + // String::from( + // "RLE encoding requires a segment row length of at least 2 bytes" + // ).into(), + //); // Reminders: // * Each image row is encoded separately // * Literal runs are a non-repetitive stream // * Replicate runs are a repetitive stream - // * 3-byte repeats shall be encoded as replicate runs + // * 2 byte repeats are encoded as replicate runs // * Maximum length of literal/replicate runs is 128 bytes // Replicate run: dst += [count, value] @@ -482,14 +479,17 @@ fn _encode_row(src: &[u8], dst: &mut Vec) -> Result<(), Box> { // count: number of bytes in the literal stream (i8 = literal - 1) // a, b, c, ...: the literal stream + // No data to be encoded + if src.len() == 0 { return Ok(()) } + // Maximum length of a literal/replicate run - let MAX_RUN: usize = 128; + let MAX_RUN: u8 = 128; let MAX_SRC = src.len(); // Track how long the current literal run is // TODO: Check if math is more efficient for i8 - let mut literal: u8 = 0; + let mut literal: u8 = 0; // Should get no larger than 128 // Track how long the current replicate run is - let mut replicate: u8 = 0; + let mut replicate: u8 = 0; // Should get no larger than 128 let mut ii: usize = 0; let mut previous: u8 = src[ii]; @@ -498,10 +498,11 @@ fn _encode_row(src: &[u8], dst: &mut Vec) -> Result<(), Box> { println!("Max src {}", MAX_SRC); loop { - current = src[ii + 1]; + ii += 1; // index of current src byte cuurent + current = src[ii]; println!( - "ii: {}, prev: {}, cur: {}, l: {}, r: {}", + "Start of loop - ii: {}, prev: {}, cur: {}, l: {}, r: {}", ii, previous, current, literal, replicate ); @@ -509,7 +510,7 @@ fn _encode_row(src: &[u8], dst: &mut Vec) -> Result<(), Box> { // -------------------------- if current == previous { // If in a literal run then write and reset to replicate - if literal > 0 { // Should be a run of at least 2? + if literal > 1 { // Should be a run of at least 2? // Write out and reset literal runs dst.push(literal - 1u8); // Indexing here is probably wrong @@ -526,7 +527,7 @@ fn _encode_row(src: &[u8], dst: &mut Vec) -> Result<(), Box> { // If in a replicate run then write and reset to literal if replicate > 1 { // Write out and reset - dst.push(u8::try_from(257 - replicate)?); + dst.push(257u8.wrapping_sub(replicate)); dst.push(previous); replicate = 0; // *switch to literal run* @@ -537,15 +538,19 @@ fn _encode_row(src: &[u8], dst: &mut Vec) -> Result<(), Box> { } // If the run length is maxed, write out and reset - if usize::from(replicate) + 1 == MAX_RUN { // Should be more frequent + if replicate == MAX_RUN - 1 { // Should be more frequent + println!("Max replicate run reached"); // Write out replicate run and reset dst.push(129); - dst.push(previous); + dst.push(current); replicate = 0; - } else if usize::from(literal) + 1 == MAX_RUN { + + // 129 byte literal run -> hmm + } else if literal == MAX_RUN - 1 { + println!("Max literal run reached"); // Write out literal run and reset dst.push(127); - for idx in (ii - usize::from(literal))..ii { // previous or current? + for idx in (ii - usize::from(literal))..ii { // push to current dst.push(src[idx]); } literal = 0; @@ -554,34 +559,58 @@ fn _encode_row(src: &[u8], dst: &mut Vec) -> Result<(), Box> { // At this point 0 < run length < MAX_RUN, so loop // -------------------------------------- - ii += 1; // fixme? - if ii == MAX_SRC - 1 { break; } - previous = current; + + //ii += 1; // fixme? + + println!( + " End of loop - ii: {}, prev: {}, cur: {}, l: {}, r: {}", + ii, previous, current, literal, replicate + ); + + if ii == MAX_SRC - 1 { break; } } println!( - "Post-loop ii: {}, prev: {}, cur: {}, l: {}, r: {}", + " Post-loop ii: {}, prev: {}, cur: {}, l: {}, r: {}", ii, previous, current, literal, replicate ); - println!("dst {:?}", dst); + println!(" dst {:?}", dst); // No more source data, finish up the last write operation + + // Handle cases where the last byte is part of a replicate run + // such as when 129 0x00 bytes + if replicate == 1 { + dst.push(255); + dst.push(current); + replicate = 0; + } + if replicate > 1 { // Write out and return // `replicate` must be at least 2 or we overflow - dst.push(u8::try_from(257 - replicate)?); + // Eh, the math is out somewhere... + // 129 to 255 -> replicate run + dst.push(256u8.wrapping_sub(replicate)); dst.push(previous); // else if replicate == 1 do what? } else if literal > 0 { // Write out and return // `literal` must be at least 1 or we undeflow + // 0 to 127 -> literal run dst.push(literal - 1u8); for idx in (MAX_SRC - usize::from(literal))..MAX_SRC { dst.push(src[idx]); } } + println!( + " Pre-return ii: {}, prev: {}, cur: {}, l: {}, r: {}", + ii, previous, current, literal, replicate + ); + println!(" dst {:?}", dst); + Ok(()) } From d1659ad695b78702336bddf779dfbbcff19b5cfa Mon Sep 17 00:00:00 2001 From: scaramallion Date: Sat, 24 Apr 2021 14:34:50 +1000 Subject: [PATCH 09/19] Pure literals working --- rle/tests/test_encode.py | 2 +- src/lib.rs | 130 +++++++++++++++++++++------------------ 2 files changed, 72 insertions(+), 60 deletions(-) diff --git a/rle/tests/test_encode.py b/rle/tests/test_encode.py index 36f7370..88a8711 100644 --- a/rle/tests/test_encode.py +++ b/rle/tests/test_encode.py @@ -11,7 +11,7 @@ # Input, output pytest.param([], b'', id='0'), # Replicate run tests - # 2 (min) replicate, could also be a 2 (min) literal + # 2 (min) replicate, could also be a 2 (min) literal 0x00 0x00 pytest.param([0] * 2, b'\xff\x00', id='1'), pytest.param([0] * 3, b'\xfe\x00', id='2'), pytest.param([0] * 64, b'\xc1\x00', id='3'), diff --git a/src/lib.rs b/src/lib.rs index f87c810..b235471 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -491,109 +491,122 @@ fn _encode_row(src: &[u8], dst: &mut Vec) -> Result<(), Box> { // Track how long the current replicate run is let mut replicate: u8 = 0; // Should get no larger than 128 - let mut ii: usize = 0; - let mut previous: u8 = src[ii]; - let mut current: u8; + let mut previous: u8 = src[0]; + let mut current: u8 = src[1]; + let mut ii: usize = 1; - println!("Max src {}", MAX_SRC); + println!( + "Preloop - ii: {}/{}, prv: {}, cur: {}, l: {}, r: {}", + ii, MAX_SRC, previous, current, literal, replicate + ); + + // Replicate and literal count are the length of the current run + // Max is 128 + // Account for the first item + if current == previous { replicate = 1; } + else { literal = 1; } loop { - ii += 1; // index of current src byte cuurent current = src[ii]; println!( - "Start of loop - ii: {}, prev: {}, cur: {}, l: {}, r: {}", - ii, previous, current, literal, replicate + "Start of loop - ii: {}/{}, prv: {}, cur: {}, l: {}, r: {}", + ii, MAX_SRC, previous, current, literal, replicate ); // Run type switching/control // -------------------------- if current == previous { // If in a literal run then write and reset to replicate - if literal > 1 { // Should be a run of at least 2? - // Write out and reset literal runs - dst.push(literal - 1u8); - // Indexing here is probably wrong - // Write from start of literal run to previous item - for idx in (ii - usize::from(literal))..ii { - dst.push(src[idx]); - } - literal = 0; - // *switch to replicate run* - } - // If switching to replicate run this is `current` item + // if literal > 1 { // Should be a run of at least 2? + // // Write out and reset literal runs + // dst.push(literal - 1u8); + // // Indexing here is probably wrong + // // Write from start of literal run to previous item + // for idx in (ii - usize::from(literal))..ii { + // dst.push(src[idx]); + // } + // literal = 0; + // // *switch to replicate run* + // } + // If switching to replicate run this is `previous` item replicate += 1; - } else { - // If in a replicate run then write and reset to literal - if replicate > 1 { - // Write out and reset - dst.push(257u8.wrapping_sub(replicate)); - dst.push(previous); - replicate = 0; - // *switch to literal run* - } // else do what? - - // If switching to literal run this is `current` item - literal += 1; } + // } else { + // // If in a replicate run then write and reset to literal + // if replicate > 1 { + // // Write out and reset + // dst.push(257u8.wrapping_sub(replicate)); + // dst.push(previous); + // replicate = 0; + // // *switch to literal run* + // } // else do what? + // + // // If switching to literal run this is `previous` item + // literal += 1; + // } // If the run length is maxed, write out and reset - if replicate == MAX_RUN - 1 { // Should be more frequent - println!("Max replicate run reached"); + if replicate == MAX_RUN { // Should be more frequent + // 128 byte run reached + println!(" Max replicate run reached"); // Write out replicate run and reset dst.push(129); - dst.push(current); + dst.push(previous); replicate = 0; + } // 129 byte literal run -> hmm - } else if literal == MAX_RUN - 1 { - println!("Max literal run reached"); - // Write out literal run and reset - dst.push(127); - for idx in (ii - usize::from(literal))..ii { // push to current - dst.push(src[idx]); - } - literal = 0; - } // 128 is noop! + // } else if literal == MAX_RUN - 1 { + // println!("Max literal run reached"); + // // Write out literal run and reset + // dst.push(127); + // for idx in (ii - usize::from(literal))..ii { // push to previous + // dst.push(src[idx]); + // } + // literal = 0; + // } // 128 is noop! // At this point 0 < run length < MAX_RUN, so loop // -------------------------------------- previous = current; - //ii += 1; // fixme? println!( - " End of loop - ii: {}, prev: {}, cur: {}, l: {}, r: {}", - ii, previous, current, literal, replicate + " End of loop - ii: {}/{}, prv: {}, cur: {}, l: {}, r: {}", + ii, MAX_SRC, previous, current, literal, replicate ); - if ii == MAX_SRC - 1 { break; } + ii += 1; + + // Break if `current` is the last byte of the src + if ii == MAX_SRC { break; } } println!( - " Post-loop ii: {}, prev: {}, cur: {}, l: {}, r: {}", - ii, previous, current, literal, replicate + " Post-loop - ii: {}/{}, prv: {}, cur: {}, l: {}, r: {}", + ii, MAX_SRC, previous, current, literal, replicate ); - println!(" dst {:?}", dst); // No more source data, finish up the last write operation // Handle cases where the last byte is part of a replicate run - // such as when 129 0x00 bytes + // such as when 129 0x00 bytes -> replicate(128) + literal(1) + // Handle cases where replicate run is 0x00 0x00 -> replicate(2) if replicate == 1 { - dst.push(255); - dst.push(current); replicate = 0; + literal = 1; } if replicate > 1 { // Write out and return - // `replicate` must be at least 2 or we overflow + // `replicate` must be at least 1 or we overflow // Eh, the math is out somewhere... + // 1 -> copy 2 // 129 to 255 -> replicate run - dst.push(256u8.wrapping_sub(replicate)); - dst.push(previous); + dst.push(257u8.wrapping_sub(replicate)); + dst.push(current); // else if replicate == 1 do what? } else if literal > 0 { // Write out and return @@ -606,10 +619,9 @@ fn _encode_row(src: &[u8], dst: &mut Vec) -> Result<(), Box> { } println!( - " Pre-return ii: {}, prev: {}, cur: {}, l: {}, r: {}", - ii, previous, current, literal, replicate + " Return - ii: {}/{}, prv: {}, cur: {}, l: {}, r: {}", + ii, MAX_SRC, previous, current, literal, replicate ); - println!(" dst {:?}", dst); Ok(()) } From aa98eccb6ec89d348b5b7cf4aa221e40d9327153 Mon Sep 17 00:00:00 2001 From: scaramallion Date: Sat, 24 Apr 2021 14:47:49 +1000 Subject: [PATCH 10/19] Pure both working --- rle/tests/test_encode.py | 16 +++++++++--- src/lib.rs | 56 +++++++++++++++++++++------------------- 2 files changed, 43 insertions(+), 29 deletions(-) diff --git a/rle/tests/test_encode.py b/rle/tests/test_encode.py index 88a8711..68a9f03 100644 --- a/rle/tests/test_encode.py +++ b/rle/tests/test_encode.py @@ -35,13 +35,23 @@ [0, 1] * 63 + [2], b'\x7e' + b'\x00\x01' * 63 + b'\x02', id='13' ), # 128 literal (max) - pytest.param([0, 1] * 64, b'\x7f' + b'\x00\x01' * 64, id='14'), + pytest.param( + [0, 1] * 64, + b'\x7f' + b'\x00\x01' * 64, + id='14'), # 128 (max) literal, 1 (min) literal pytest.param( - [0, 1] * 64 + [2], b'\x7f' + b'\x00\x01' * 64 + b'\x00\x02', id='15' + [0, 1] * 64 + [2], + b'\x7f' + b'\x00\x01' * 64 + b'\x00\x02', + id='15' ), # 128 (max) x 5 literals - pytest.param([0, 1] * 64 * 5, (b'\x7f' + b'\x00\x01' * 64) * 5, id='16'), + pytest.param( + [0, 1] * 64 * 5, + (b'\x7f' + b'\x00\x01' * 64) * 5, + id='16' + ), + # Combination run tests # 1 (min) literal, 1 (min) replicate pytest.param([0, 1, 1], b'\x00\x00\xff\x01', id='17'), diff --git a/src/lib.rs b/src/lib.rs index b235471..1bc560f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -480,7 +480,15 @@ fn _encode_row(src: &[u8], dst: &mut Vec) -> Result<(), Box> { // a, b, c, ...: the literal stream // No data to be encoded - if src.len() == 0 { return Ok(()) } + match src.len() { + 0 => { return Ok(()) }, + 1 => { + dst.push(0); + dst.push(0); + return Ok(()) + }, + _ => {} + } // Maximum length of a literal/replicate run let MAX_RUN: u8 = 128; @@ -531,20 +539,19 @@ fn _encode_row(src: &[u8], dst: &mut Vec) -> Result<(), Box> { // } // If switching to replicate run this is `previous` item replicate += 1; + } else { + // If in a replicate run then write and reset to literal + // if replicate > 1 { + // // Write out and reset + // dst.push(257u8.wrapping_sub(replicate)); + // dst.push(previous); + // replicate = 0; + // // *switch to literal run* + // } // else do what? + + // If switching to literal run this is `previous` item + literal += 1; } - // } else { - // // If in a replicate run then write and reset to literal - // if replicate > 1 { - // // Write out and reset - // dst.push(257u8.wrapping_sub(replicate)); - // dst.push(previous); - // replicate = 0; - // // *switch to literal run* - // } // else do what? - // - // // If switching to literal run this is `previous` item - // literal += 1; - // } // If the run length is maxed, write out and reset if replicate == MAX_RUN { // Should be more frequent @@ -554,18 +561,15 @@ fn _encode_row(src: &[u8], dst: &mut Vec) -> Result<(), Box> { dst.push(129); dst.push(previous); replicate = 0; - } - - // 129 byte literal run -> hmm - // } else if literal == MAX_RUN - 1 { - // println!("Max literal run reached"); - // // Write out literal run and reset - // dst.push(127); - // for idx in (ii - usize::from(literal))..ii { // push to previous - // dst.push(src[idx]); - // } - // literal = 0; - // } // 128 is noop! + } else if literal == MAX_RUN { + println!("Max literal run reached"); + // Write out literal run and reset + dst.push(127); + for idx in ii + 1 - usize::from(literal)..ii + 1 { // push to previous + dst.push(src[idx]); + } + literal = 0; + } // 128 is noop! // At this point 0 < run length < MAX_RUN, so loop // -------------------------------------- From ba42e50c2878b9b1404b8f98961b31b703021482 Mon Sep 17 00:00:00 2001 From: scaramallion Date: Sat, 24 Apr 2021 15:22:17 +1000 Subject: [PATCH 11/19] Tests passing --- rle/tests/test_encode.py | 29 +++++++++++------------ src/lib.rs | 50 +++++++++++++++++++--------------------- 2 files changed, 37 insertions(+), 42 deletions(-) diff --git a/rle/tests/test_encode.py b/rle/tests/test_encode.py index 68a9f03..de28a27 100644 --- a/rle/tests/test_encode.py +++ b/rle/tests/test_encode.py @@ -35,29 +35,26 @@ [0, 1] * 63 + [2], b'\x7e' + b'\x00\x01' * 63 + b'\x02', id='13' ), # 128 literal (max) - pytest.param( - [0, 1] * 64, - b'\x7f' + b'\x00\x01' * 64, - id='14'), + pytest.param([0, 1] * 64, b'\x7f' + b'\x00\x01' * 64, id='14'), # 128 (max) literal, 1 (min) literal pytest.param( - [0, 1] * 64 + [2], - b'\x7f' + b'\x00\x01' * 64 + b'\x00\x02', - id='15' + [0, 1] * 64 + [2], b'\x7f' + b'\x00\x01' * 64 + b'\x00\x02', id='15' ), # 128 (max) x 5 literals - pytest.param( - [0, 1] * 64 * 5, - (b'\x7f' + b'\x00\x01' * 64) * 5, - id='16' - ), + pytest.param([0, 1] * 64 * 5, (b'\x7f' + b'\x00\x01' * 64) * 5, id='16'), # Combination run tests - # 1 (min) literal, 1 (min) replicate - pytest.param([0, 1, 1], b'\x00\x00\xff\x01', id='17'), - # 1 (min) literal, 128 (max) replicate - pytest.param([0] + [1] * 128, b'\x00\x00\x81\x01', id='18'), + # 2 literal, 1(min) literal + # or 1 (min) literal, 1 (min) replicate b'\x00\x00\xff\x01' + pytest.param([0, 1, 1], b'\x01\x00\x01\x00\x01', id='17'), + # 2 literal, 127 replicate + # or 1 (min) literal, 128 (max) replicate + pytest.param([0] + [1] * 128, b'\x01\x00\x01\x82\x01', id='18'), + # 2 literal, 128 (max) replicate + # or 1 (min) literal, 128 (max) replicate, 1 (min) literal + pytest.param([0] + [1] * 129, b'\x01\x00\x01\x81\x01', id='18b'), # 128 (max) literal, 2 (min) replicate + # 128 (max literal) pytest.param( [0, 1] * 64 + [2] * 2, b'\x7f' + b'\x00\x01' * 64 + b'\xff\x02', diff --git a/src/lib.rs b/src/lib.rs index 1bc560f..4788166 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -503,17 +503,17 @@ fn _encode_row(src: &[u8], dst: &mut Vec) -> Result<(), Box> { let mut current: u8 = src[1]; let mut ii: usize = 1; - println!( - "Preloop - ii: {}/{}, prv: {}, cur: {}, l: {}, r: {}", - ii, MAX_SRC, previous, current, literal, replicate - ); - // Replicate and literal count are the length of the current run // Max is 128 // Account for the first item if current == previous { replicate = 1; } else { literal = 1; } + println!( + "Preloop - ii: {}/{}, prv: {}, cur: {}, l: {}, r: {}", + ii, MAX_SRC, previous, current, literal, replicate + ); + loop { current = src[ii]; @@ -525,29 +525,27 @@ fn _encode_row(src: &[u8], dst: &mut Vec) -> Result<(), Box> { // Run type switching/control // -------------------------- if current == previous { - // If in a literal run then write and reset to replicate - // if literal > 1 { // Should be a run of at least 2? - // // Write out and reset literal runs - // dst.push(literal - 1u8); - // // Indexing here is probably wrong - // // Write from start of literal run to previous item - // for idx in (ii - usize::from(literal))..ii { - // dst.push(src[idx]); - // } - // literal = 0; - // // *switch to replicate run* - // } + if literal == 1 { + literal = 0; + replicate = 1; + } else if literal > 1 { + dst.push(literal - 1u8); + for idx in ii - usize::from(literal)..ii { + dst.push(src[idx]); + } + literal = 0; + } // If switching to replicate run this is `previous` item replicate += 1; } else { - // If in a replicate run then write and reset to literal - // if replicate > 1 { - // // Write out and reset - // dst.push(257u8.wrapping_sub(replicate)); - // dst.push(previous); - // replicate = 0; - // // *switch to literal run* - // } // else do what? + if replicate == 1 { + literal = 1; + replicate = 0; + } else if replicate > 1 { + dst.push(257u8.wrapping_sub(replicate)); + dst.push(previous); + replicate = 0; + } // If switching to literal run this is `previous` item literal += 1; @@ -565,7 +563,7 @@ fn _encode_row(src: &[u8], dst: &mut Vec) -> Result<(), Box> { println!("Max literal run reached"); // Write out literal run and reset dst.push(127); - for idx in ii + 1 - usize::from(literal)..ii + 1 { // push to previous + for idx in ii + 1 - usize::from(literal)..ii + 1 { dst.push(src[idx]); } literal = 0; From c703f0d75d647ba695f8f47e1c7f0266336e8613 Mon Sep 17 00:00:00 2001 From: scaramallion Date: Sun, 25 Apr 2021 13:56:15 +1000 Subject: [PATCH 12/19] Frame encoding working --- Cargo.lock | 117 ++++++++++++ Cargo.toml | 1 + rle/tests/test_decode.py | 2 +- rle/tests/test_encode.py | 144 +++++++++++++-- src/lib.rs | 384 +++++++++++++++++++++++++++++++-------- 5 files changed, 556 insertions(+), 92 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 6f8e556..97cbfe2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5,8 +5,15 @@ name = "_rle" version = "1.0.0" dependencies = [ "pyo3", + "rayon", ] +[[package]] +name = "autocfg" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a" + [[package]] name = "bitflags" version = "1.2.1" @@ -19,6 +26,51 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "crossbeam-channel" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06ed27e177f16d65f0f0c22a213e17c696ace5dd64b14258b52f9417ccb52db4" +dependencies = [ + "cfg-if", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-deque" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94af6efb46fef72616855b036a624cf27ba656ffc9be1b9a3c931cfc7749a9a9" +dependencies = [ + "cfg-if", + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2584f639eb95fea8c798496315b297cf81b9b58b6d30ab066a75455333cf4b12" +dependencies = [ + "cfg-if", + "crossbeam-utils", + "lazy_static", + "memoffset", + "scopeguard", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7e9d99fa91428effe99c5c6d4634cdeba32b8cf784fc428a2a687f61a952c49" +dependencies = [ + "autocfg", + "cfg-if", + "lazy_static", +] + [[package]] name = "ctor" version = "0.1.20" @@ -29,6 +81,12 @@ dependencies = [ "syn", ] +[[package]] +name = "either" +version = "1.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457" + [[package]] name = "ghost" version = "0.1.2" @@ -40,6 +98,15 @@ dependencies = [ "syn", ] +[[package]] +name = "hermit-abi" +version = "0.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "322f4de77956e22ed0e5032c359a0f1273f1f7f0d79bfa3b8ffbc730d7fbcc5c" +dependencies = [ + "libc", +] + [[package]] name = "indoc" version = "0.3.6" @@ -94,6 +161,12 @@ dependencies = [ "syn", ] +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + [[package]] name = "libc" version = "0.2.93" @@ -109,6 +182,25 @@ dependencies = [ "scopeguard", ] +[[package]] +name = "memoffset" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f83fb6581e8ed1f85fd45c116db8405483899489e38406156c25eb743554361d" +dependencies = [ + "autocfg", +] + +[[package]] +name = "num_cpus" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05499f3756671c15885fee9034446956fff3f243d6077b91e5767df161f766b3" +dependencies = [ + "hermit-abi", + "libc", +] + [[package]] name = "parking_lot" version = "0.11.1" @@ -216,6 +308,31 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "rayon" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b0d8e0819fadc20c74ea8373106ead0600e3a67ef1fe8da56e39b9ae7275674" +dependencies = [ + "autocfg", + "crossbeam-deque", + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ab346ac5921dc62ffa9f89b7a773907511cdfa5490c572ae9be1be33e8afa4a" +dependencies = [ + "crossbeam-channel", + "crossbeam-deque", + "crossbeam-utils", + "lazy_static", + "num_cpus", +] + [[package]] name = "redox_syscall" version = "0.2.6" diff --git a/Cargo.toml b/Cargo.toml index 032c909..8612fc4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,3 +12,4 @@ crate-type = ["cdylib"] [dependencies] pyo3 = { version = "0.13.1", features = ["extension-module"] } +rayon = { version = "1.5.0" } diff --git a/rle/tests/test_decode.py b/rle/tests/test_decode.py index 7b88e83..0712409 100644 --- a/rle/tests/test_decode.py +++ b/rle/tests/test_decode.py @@ -136,7 +136,7 @@ def test_non_increasing_offsets_raises(self): def test_invalid_samples_px_raises(self): """Test exception if samples per px not 1 or 3.""" - msg = r"The \(0028,0002\) 'Samples Per Pixel' must be 1 or 3" + msg = r"The \(0028,0002\) 'Samples per Pixel' must be 1 or 3" d = self.as_bytes([64, 70]) with pytest.raises(ValueError, match=msg): decode_frame(d + b'\x00' * 8, 1, 8) diff --git a/rle/tests/test_encode.py b/rle/tests/test_encode.py index de28a27..35a2710 100644 --- a/rle/tests/test_encode.py +++ b/rle/tests/test_encode.py @@ -2,8 +2,22 @@ import numpy as np import pytest +import sys -from rle._rle import encode_row +from pydicom import dcmread +from pydicom.data import get_testdata_file +from pydicom.encaps import defragment_data +from pydicom.pixel_data_handlers.util import ( + pixel_dtype, reshape_pixel_array +) + +from rle.data import get_indexed_datasets +from rle._rle import ( + encode_row, encode_segment, decode_segment, encode_frame, decode_frame +) + + +INDEX = get_indexed_datasets('1.2.840.10008.1.2.5') # Tests for RLE encoding @@ -13,6 +27,7 @@ # Replicate run tests # 2 (min) replicate, could also be a 2 (min) literal 0x00 0x00 pytest.param([0] * 2, b'\xff\x00', id='1'), + pytest.param([255] * 2, b'\xff\xFF', id='1b'), pytest.param([0] * 3, b'\xfe\x00', id='2'), pytest.param([0] * 64, b'\xc1\x00', id='3'), pytest.param([0] * 127, b'\x82\x00', id='4'), @@ -27,6 +42,7 @@ # Literal run tests # 1 (min) literal pytest.param([0], b'\x00\x00', id='9'), + pytest.param([255], b'\x00\xff', id='9b'), pytest.param([0, 1], b'\x01\x00\x01', id='10'), pytest.param([0, 1, 2], b'\x02\x00\x01\x02', id='11'), pytest.param([0, 1] * 32, b'\x3f' + b'\x00\x01' * 32, id='12'), @@ -90,39 +106,145 @@ def test_encode(self, row, ref): assert ref == encode_row(row.tobytes()) -@pytest.mark.skip() +def test_encode_segment(): + data = bytes([0] * 128 + [1, 2] * 64) * 20 + #data = np.asarray(data, dtype='uint8').tobytes() + result = encode_segment(data, 512) + print(len(data)) + print(data[:50]) + print(result[:50]) + out = decode_segment(result) + assert data == out + + +def test_encode_frame(): + # u16, little endian, 3 spp + #ds = dcmread(get_testdata_file("SC_rgb_16bit.dcm")) + # u16, big endian, 3 spp, + ds = INDEX["SC_rgb_rle_16bit.dcm"]['ds'] + #b = defragment_data(ds.PixelData) + #print(' '.join([f"{ii}" for ii in b[:64]])) + arr_be_a = ds.pixel_array + arr_be_a[0, 0, 0] = 10 + byteorder = arr_be_a.dtype.byteorder + + # TODO: Add to utils function + # maybe check whether C or F ordering too? + if byteorder == '=': + byteorder = '<' if sys.byteorder == "little" else '>' + + print(arr_be_a.tobytes()[:120]) + + frame_be = encode_frame( + arr_be_a.tobytes(), + ds.Rows, + ds.Columns, + ds.SamplesPerPixel, + ds.BitsAllocated, + byteorder + ) + + out_be = decode_frame(frame_be, ds.Rows * ds.Columns, ds.BitsAllocated) + dtype = pixel_dtype(ds).newbyteorder('>') + arr_be = np.frombuffer(out_be, dtype) + arr_be = reshape_pixel_array(ds, arr_be) + + + ds = dcmread(get_testdata_file("SC_rgb_16bit.dcm")) + arr_le_a = ds.pixel_array + arr_le_a[0, 0, 0] = 10 + + byteorder = arr_le_a.dtype.byteorder + if byteorder == '=': + byteorder = '<' if sys.byteorder == "little" else '>' + + print(arr_le_a.tobytes()[:120]) + + frame_le = encode_frame( + arr_le_a.tobytes(), + ds.Rows, + ds.Columns, + ds.SamplesPerPixel, + ds.BitsAllocated, + byteorder + ) + + out_le = decode_frame(frame_le, ds.Rows * ds.Columns, ds.BitsAllocated) + dtype = pixel_dtype(ds).newbyteorder('>') + arr_le = np.frombuffer(out_le, dtype) + + if ds.SamplesPerPixel == 1: + arr_le = arr_le.reshape(ds.Rows, ds.Columns) + else: + # RLE is planar configuration 1 + arr_le = np.reshape(arr_le, (ds.SamplesPerPixel, ds.Rows, ds.Columns)) + arr_le = arr_le.transpose(1, 2, 0) + + + import matplotlib.pyplot as plt + fig, (ax1, ax2) = plt.subplots(1, 2) + ax1.imshow(arr_be) + ax2.imshow(arr_le) + plt.show() + + assert out_le == out_be + + + +def test_numpy(): + #ds = INDEX["SC_rgb_rle_16bit.dcm"]['ds'] + ds = dcmread(get_testdata_file("SC_rgb_16bit.dcm")) + arr = ds.pixel_array + arr[0, 0, 0] = 10 + print(arr) + print(arr.tobytes()[:20]) + + # [Channel][Column index][byte order]-[Row index] + # spp 3, bpp 1, 100x100 + # R0-0, G0-0, B0-0, ..., B99-0, R0-1, G0-1, B0-1, ..., B99-1, ... + + # Big endian byte order + # spp 3, bpp 2, 100x100: a = MSB, b = LSB + # R0a-0, R0b-0, G0a-0, G0b-0, B0a-0, B0a-0, ..., B99a-0, B99b-0, R0a-1, ... + + # Little endian byte order + # spp 3, bpp 2, 100x100: a = MSB, b = LSB + # R0b-0, R0a-0, G0b-0, G0a-0, B0b-0, B0a-0, ..., B99b-0, B99a-0, R0b-1, ... + + + class TestEncodeSegment: - """Tests for rle_handler._rle_encode_segment.""" + """Tests for _rle.encode_segment.""" def test_one_row(self): """Test encoding data that contains only a single row.""" - ds = dcmread(RLE_8_1_1F) + ds = INDEX["OBXXXX1A_rle.dcm"]['ds'] pixel_data = defragment_data(ds.PixelData) - decoded = _rle_decode_segment(pixel_data[64:]) + decoded = decode_segment(pixel_data[64:]) assert ds.Rows * ds.Columns == len(decoded) arr = np.frombuffer(decoded, 'uint8').reshape(ds.Rows, ds.Columns) # Re-encode a single row of the decoded data row = arr[0] assert (ds.Columns,) == row.shape - encoded = _rle_encode_segment(row) + encoded = encode_segment(row.tobytes(), ds.Columns) # Decode the re-encoded data and check that it's the same - redecoded = _rle_decode_segment(encoded) + redecoded = decode_segment(encoded) assert ds.Columns == len(redecoded) assert decoded[:ds.Columns] == redecoded def test_cycle(self): """Test the decoded data remains the same after encoding/decoding.""" - ds = dcmread(RLE_8_1_1F) + ds = INDEX["OBXXXX1A_rle.dcm"]['ds'] pixel_data = defragment_data(ds.PixelData) - decoded = _rle_decode_segment(pixel_data[64:]) + decoded = decode_segment(pixel_data[64:]) assert ds.Rows * ds.Columns == len(decoded) arr = np.frombuffer(decoded, 'uint8').reshape(ds.Rows, ds.Columns) # Re-encode the decoded data - encoded = _rle_encode_segment(arr) + encoded = encode_segment(arr.tobytes(), ds.Columns) # Decode the re-encoded data and check that it's the same - redecoded = _rle_decode_segment(encoded) + redecoded = decode_segment(encoded) assert ds.Rows * ds.Columns == len(redecoded) assert decoded == redecoded diff --git a/src/lib.rs b/src/lib.rs index 4788166..a67409b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,6 +1,6 @@ -use std::error::Error; use std::convert::TryFrom; +use std::error::Error; use pyo3::prelude::*; use pyo3::wrap_pyfunction; @@ -8,6 +8,8 @@ use pyo3::types::{PyBytes, PyByteArray}; use pyo3::exceptions::{PyValueError}; +//use rayon::prelude::*; + // RLE Decoding #[pyfunction] @@ -130,7 +132,7 @@ fn _decode_frame( String::from("Frame is not long enough to contain RLE encoded data").into() ); let err_invalid_nr_samples = Err( - String::from("The (0028,0002) 'Samples Per Pixel' must be 1 or 3").into() + String::from("The (0028,0002) 'Samples per Pixel' must be 1 or 3").into() ); let err_segment_length = Err( String::from( @@ -186,8 +188,7 @@ fn _decode_frame( // Check the samples per pixel is conformant let samples_per_px: u8 = nr_segments / bytes_per_pixel; match samples_per_px { - 1 => {}, - 3 => {}, + 1 | 3 => {}, _ => return err_invalid_nr_samples } @@ -410,11 +411,282 @@ fn _decode_segment(enc: &[u8], out: &mut Vec) -> Result<(), Box> // RLE Encoding #[pyfunction] -fn encode_frame() {} +fn encode_frame<'a>( + enc: &[u8], rows: u16, cols: u16, spp: u8, bpp: u8, byteorder: char, py: Python<'a> +) -> PyResult<&'a PyBytes> { + let mut dst: Vec = Vec::new(); + match _encode_frame(enc, &mut dst, rows, cols, spp, bpp, byteorder) { + Ok(()) => return Ok(PyBytes::new(py, &dst[..])), + Err(err) => return Err(PyValueError::new_err(err.to_string())), + } +} + + +fn _encode_frame( + src: &[u8], dst: &mut Vec, rows: u16, cols: u16, spp: u8, bpp: u8, byteorder: char +) -> Result<(), Box> { + /* + + Parameters + ---------- + src + The data to be RLE encoded. + dst + + rows + The number of rows in the data. + cols + The number of columns in the data. + spp + The number of samples per pixel, supported values are 1 or 3. + bpp + The number of bits per pixel, supported values are 8, 16, 32 and 64. + byteorder + Required if bpp is greater than 1, '>' if `src` is in big endian byte + order, '<' if little endian. + + */ + let err_invalid_nr_samples = Err( + String::from("The (0028,0002) 'Samples per Pixel' must be 1 or 3").into() + ); + let err_bits_zero = Err( + String::from( + "The (0028,0010) 'Bits Allocated' value must be greater than 0" + ).into(), + ); + let err_bits_not_octal = Err( + String::from( + "The (0028,0010) 'Bits Allocated' value must be a multiple of 8" + ).into(), + ); + let err_invalid_bytes = Err( + String::from( + "A (0028,0010) 'Bits Allocated' value greater than 64 is not supported" + ).into() + ); + let err_invalid_nr_segments = Err( + String::from( + "Unable to encode as the DICOM Standard only allows \ + a maximum of 15 segments in RLE encoded data" + ).into() + ); + let err_invalid_parameters = Err( + String::from( + "The length of the data to be encoded is not consistent with the \ + the values of the dataset's 'Rows', 'Columns', 'Samples per Pixel' \ + and 'Bits Allocated' elements" + + ).into() + ); + let err_invalid_byteorder = Err( + String::from("Invalid byte order, must be '>' or '<'").into() + ); + + // Check Samples per Pixel + match spp { + 1 | 3 => {}, + _ => return err_invalid_nr_samples + } + + // Check Bits Allocated and byte ordering + match bpp { + 0 => return err_bits_zero, + _ => match bpp % 8 { + 0 => {}, + _ => return err_bits_not_octal + } + } + // Ensure `bytes_per_pixel` is in [1, 8] + let bytes_per_pixel: u8 = bpp / 8; + if bytes_per_pixel > 8 { return err_invalid_bytes } + + if bytes_per_pixel > 1 { + match byteorder { + '>' | '<' => {}, + _ => return err_invalid_byteorder + } + } + + println!( + "src len {}, r {}, c {}, spp {}, bpp {}, bo {}", + src.len(), rows, cols, spp, bpp, byteorder + ); + + // Ensure parameters are consistent + let r = usize::try_from(rows).unwrap(); + let c = usize::try_from(cols).unwrap(); + + if src.len() != r * c * usize::from(spp * bytes_per_pixel) { + return err_invalid_parameters + } + + let nr_segments: u8 = spp * bytes_per_pixel; + println!("Nr segs {}", nr_segments); + if nr_segments > 15 { return err_invalid_nr_segments } + + let is_big_endian: bool = byteorder == '>'; + println!("is big {}", is_big_endian); + + + // Big endian byte order '>' + // spp 3, bpp 2, 100x100: a = MSB, b = LSB + // R0a-0, R0b-0, G0a-0, G0b-0, B0a-0, B0a-0, ..., B99a-0, B99b-0, R0a-1, ... + + // Little endian byte order '<' + // spp 3, bpp 2, 100x100: a = MSB, b = LSB + // R0b-0, R0a-0, G0b-0, G0a-0, B0b-0, B0a-0, ..., B99b-0, B99a-0, R0b-1, ... + + // Need to be big endian u32s + //let mut header: Vec = vec![0; 16]; + dst.extend(u32::from(nr_segments).to_le_bytes().to_vec()); + dst.extend([0u8; 60].to_vec()); + println!("header {}", dst.len()); + // Big endian + let mut start = 0; + let process_step = usize::from(spp * bytes_per_pixel); + + let mut start: Vec = (0..usize::from(nr_segments)).collect(); + if !is_big_endian { + // Shuffle from BE to LE ordering + // This is ridiculous! + for ii in 0..usize::from(nr_segments / bytes_per_pixel) { + println!("{} {}", ii, bpp); + match bpp { + 8 => {}, + 16 => start.swap(ii * 2, ii * 2 + 1), + 32 => { + start.swap(ii * 2, ii * 2 + 3); + start.swap(ii * 2 + 1, ii * 2 + 2); + }, + 64 => { + start.swap(ii * 2, ii * 2 + 7); + start.swap(ii * 2 + 1, ii * 2 + 6); + start.swap(ii * 2 + 2, ii * 2 + 5); + start.swap(ii * 2 + 3, ii * 2 + 4); + }, + _ => { return err_invalid_bytes } + } + } + } + println!("starts {:?}", start); + + for idx in 0..usize::from(nr_segments) { + + // Convert to 4 big-endian ordered u8s + let offset = (u32::try_from(dst.len()).unwrap()).to_le_bytes(); + //println!("{:?}", offset); + for ii in idx * 4 + 4..idx * 4 + 8 { + // idx = 0: 4, 5, 6, 7 -> 0, 1, 2, 3 + // idx = 1: 8, 9, 10, 11 -> 0, 1, 2, 3 + dst[ii] = offset[ii - idx * 4 - 4]; + } + + println!("idx {}, ps {}, o {:?}", idx, process_step, offset); + // Correlate idx to which bytes to process as a segment + // spp 1, bpp 1 -> nr segments 1 -> process all + // spp 3, bpp 1 -> nr_segments 3 -> process every third then offset by 1 + // spp 1, bpp 2 -> nr_segments 2 -> process every second then offset by 1 + // spp 3, bpp 2 -> nr_segments 6 -> process every sixth then offset by 1 + // spp 3, bpp 4 -> nr_segments 12 -> process every twelfth then offset by 1 + + // Need to reverse order of offset if little endian + // i.e. 0 -> 1 -> 2 -> 3 becomes 3 -> 2 -> 1 -> 0 + + // need to offset src iter start by idx + let segment: Vec = src[start[idx]..].into_iter().step_by(process_step).cloned().collect(); + _encode_segment_b(segment, dst, cols); + //offsets.push(dst.len()); + } + println!("{:?}", &dst[..64]); + println!("{:?}", &dst[64..164]); + + Ok(()) +} + + +#[pyfunction] +fn encode_segment<'a>(src: &[u8], cols: u16, py: Python<'a>) -> PyResult<&'a PyBytes> { + let mut dst = Vec::new(); + match _encode_segment(src, &mut dst, cols) { + Ok(()) => return Ok(PyBytes::new(py, &dst[..])), + Err(err) => return Err(PyValueError::new_err(err.to_string())), + } +} + + +fn _encode_segment( + src: &[u8], dst: &mut Vec, cols: u16 +) -> Result<(), Box> { + /* + + Parameters + ---------- + src + The data to be encoded. + dst + The destination for the encoded data. + cols + The length of each row in the `src`. + */ + let err_invalid_length = Err( + String::from("The (0028,0011) 'Columns' value is invalid").into() + ); + let row_len: usize = usize::try_from(cols).unwrap(); + + if src.len() % row_len != 0 { return err_invalid_length } + + let nr_rows = src.len() / row_len; + let mut offset: usize = 0; + + for row_idx in 0..nr_rows { + offset = row_idx * row_len; + _encode_row(&src[offset..offset + row_len], dst); + } -fn _encode_segment() -> Result<(), Box> { // Each segment must be even length or padded to even length with noop byte + if dst.len() % 2 != 0 { + dst.push(128); + } + + Ok(()) +} + +fn _encode_segment_b( + src: Vec, dst: &mut Vec, cols: u16 +) -> Result<(), Box> { + /* + + Parameters + ---------- + src + The data to be encoded. + dst + The destination for the encoded data. + cols + The length of each row in the `src`. + */ + let err_invalid_length = Err( + String::from("The (0028,0011) 'Columns' value is invalid").into() + ); + + let row_len: usize = usize::try_from(cols).unwrap(); + + if src.len() % row_len != 0 { return err_invalid_length } + + let nr_rows = src.len() / row_len; + let mut offset: usize = 0; + + for row_idx in 0..nr_rows { + offset = row_idx * row_len; + _encode_row(&src[offset..offset + row_len], dst); + } + + // Each segment must be even length or padded to even length with noop byte + if dst.len() % 2 != 0 { + dst.push(128); + } + Ok(()) } @@ -433,8 +705,6 @@ fn encode_row<'a>(src: &[u8], py: Python<'a>) -> PyResult<&'a PyBytes> { bytes The RLE encoded data. */ - // Be nice to make use of threading for row encoding - // Assuming all literal runs, `dst` can never be greater than // ceil(src.len() / 128) + src.len() @@ -454,15 +724,10 @@ fn _encode_row(src: &[u8], dst: &mut Vec) -> Result<(), Box> { Parameters ---------- src - The data to be encoded, must contain at least 2 items. + The data to be encoded. dst The destination for the encoded data. */ - //let err_short_src = Err( - // String::from( - // "RLE encoding requires a segment row length of at least 2 bytes" - // ).into(), - //); // Reminders: // * Each image row is encoded separately @@ -479,12 +744,11 @@ fn _encode_row(src: &[u8], dst: &mut Vec) -> Result<(), Box> { // count: number of bytes in the literal stream (i8 = literal - 1) // a, b, c, ...: the literal stream - // No data to be encoded match src.len() { 0 => { return Ok(()) }, 1 => { - dst.push(0); - dst.push(0); + dst.push(0); // literal run + dst.push(src[0]); return Ok(()) }, _ => {} @@ -493,109 +757,80 @@ fn _encode_row(src: &[u8], dst: &mut Vec) -> Result<(), Box> { // Maximum length of a literal/replicate run let MAX_RUN: u8 = 128; let MAX_SRC = src.len(); - // Track how long the current literal run is - // TODO: Check if math is more efficient for i8 - let mut literal: u8 = 0; // Should get no larger than 128 - // Track how long the current replicate run is - let mut replicate: u8 = 0; // Should get no larger than 128 + + // `replicate` and `literal` are the length of the current run + let mut literal: u8 = 0; + let mut replicate: u8 = 0; let mut previous: u8 = src[0]; let mut current: u8 = src[1]; let mut ii: usize = 1; - // Replicate and literal count are the length of the current run - // Max is 128 // Account for the first item if current == previous { replicate = 1; } else { literal = 1; } - println!( - "Preloop - ii: {}/{}, prv: {}, cur: {}, l: {}, r: {}", - ii, MAX_SRC, previous, current, literal, replicate - ); - loop { current = src[ii]; - println!( - "Start of loop - ii: {}/{}, prv: {}, cur: {}, l: {}, r: {}", - ii, MAX_SRC, previous, current, literal, replicate - ); + //println!( + // "Start of loop - ii: {}/{}, prv: {}, cur: {}, l: {}, r: {}", + // ii, MAX_SRC, previous, current, literal, replicate + //); // Run type switching/control - // -------------------------- if current == previous { if literal == 1 { + // Switch over to a replicate run literal = 0; replicate = 1; } else if literal > 1 { + // Write out literal run and reset dst.push(literal - 1u8); - for idx in ii - usize::from(literal)..ii { - dst.push(src[idx]); - } + // Hmm, check the indexing here... + dst.extend(&src[ii - usize::from(literal)..ii]); literal = 0; } - // If switching to replicate run this is `previous` item replicate += 1; + } else { if replicate == 1 { + // Switch over to a literal run literal = 1; replicate = 0; } else if replicate > 1 { + // Write out replicate run and reset + // `replicate` must be at least 2 to avoid overflow dst.push(257u8.wrapping_sub(replicate)); + // Note use of previous item dst.push(previous); replicate = 0; } - - // If switching to literal run this is `previous` item literal += 1; } // If the run length is maxed, write out and reset if replicate == MAX_RUN { // Should be more frequent - // 128 byte run reached - println!(" Max replicate run reached"); // Write out replicate run and reset dst.push(129); dst.push(previous); replicate = 0; } else if literal == MAX_RUN { - println!("Max literal run reached"); // Write out literal run and reset dst.push(127); - for idx in ii + 1 - usize::from(literal)..ii + 1 { - dst.push(src[idx]); - } + // The indexing is different here because ii is the current item, not previous + dst.extend(&src[ii + 1 - usize::from(literal)..ii + 1]); literal = 0; } // 128 is noop! - // At this point 0 < run length < MAX_RUN, so loop - // -------------------------------------- - previous = current; - - - println!( - " End of loop - ii: {}/{}, prv: {}, cur: {}, l: {}, r: {}", - ii, MAX_SRC, previous, current, literal, replicate - ); - ii += 1; - // Break if `current` is the last byte of the src if ii == MAX_SRC { break; } } - println!( - " Post-loop - ii: {}/{}, prv: {}, cur: {}, l: {}, r: {}", - ii, MAX_SRC, previous, current, literal, replicate - ); - - // No more source data, finish up the last write operation - - // Handle cases where the last byte is part of a replicate run + // Handle cases where the last byte is continuation of a replicate run // such as when 129 0x00 bytes -> replicate(128) + literal(1) - // Handle cases where replicate run is 0x00 0x00 -> replicate(2) if replicate == 1 { replicate = 0; literal = 1; @@ -603,28 +838,16 @@ fn _encode_row(src: &[u8], dst: &mut Vec) -> Result<(), Box> { if replicate > 1 { // Write out and return - // `replicate` must be at least 1 or we overflow - // Eh, the math is out somewhere... - // 1 -> copy 2 - // 129 to 255 -> replicate run + // `replicate` must be at least 2 or we overflow dst.push(257u8.wrapping_sub(replicate)); dst.push(current); - // else if replicate == 1 do what? } else if literal > 0 { // Write out and return // `literal` must be at least 1 or we undeflow - // 0 to 127 -> literal run dst.push(literal - 1u8); - for idx in (MAX_SRC - usize::from(literal))..MAX_SRC { - dst.push(src[idx]); - } + dst.extend(&src[MAX_SRC - usize::from(literal)..]); } - println!( - " Return - ii: {}/{}, prv: {}, cur: {}, l: {}, r: {}", - ii, MAX_SRC, previous, current, literal, replicate - ); - Ok(()) } @@ -636,6 +859,7 @@ fn _rle(_: Python, m: &PyModule) -> PyResult<()> { m.add_function(wrap_pyfunction!(decode_frame, m)?).unwrap(); m.add_function(wrap_pyfunction!(encode_row, m)?).unwrap(); + m.add_function(wrap_pyfunction!(encode_segment, m)?).unwrap(); m.add_function(wrap_pyfunction!(encode_frame, m)?).unwrap(); Ok(()) From d5dc7238b8ba41ec6c1ced7eb86db499fe15c42d Mon Sep 17 00:00:00 2001 From: scaramallion Date: Sun, 25 Apr 2021 18:52:13 +1000 Subject: [PATCH 13/19] Add tests for frame encoding --- Cargo.lock | 117 -------- Cargo.toml | 1 - rle/tests/test_decode.py | 10 +- rle/tests/test_encode.py | 627 +++++++++++---------------------------- rle/utils.py | 100 ++++++- src/lib.rs | 182 ++++-------- 6 files changed, 343 insertions(+), 694 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 97cbfe2..6f8e556 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5,15 +5,8 @@ name = "_rle" version = "1.0.0" dependencies = [ "pyo3", - "rayon", ] -[[package]] -name = "autocfg" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a" - [[package]] name = "bitflags" version = "1.2.1" @@ -26,51 +19,6 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" -[[package]] -name = "crossbeam-channel" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06ed27e177f16d65f0f0c22a213e17c696ace5dd64b14258b52f9417ccb52db4" -dependencies = [ - "cfg-if", - "crossbeam-utils", -] - -[[package]] -name = "crossbeam-deque" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94af6efb46fef72616855b036a624cf27ba656ffc9be1b9a3c931cfc7749a9a9" -dependencies = [ - "cfg-if", - "crossbeam-epoch", - "crossbeam-utils", -] - -[[package]] -name = "crossbeam-epoch" -version = "0.9.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2584f639eb95fea8c798496315b297cf81b9b58b6d30ab066a75455333cf4b12" -dependencies = [ - "cfg-if", - "crossbeam-utils", - "lazy_static", - "memoffset", - "scopeguard", -] - -[[package]] -name = "crossbeam-utils" -version = "0.8.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e7e9d99fa91428effe99c5c6d4634cdeba32b8cf784fc428a2a687f61a952c49" -dependencies = [ - "autocfg", - "cfg-if", - "lazy_static", -] - [[package]] name = "ctor" version = "0.1.20" @@ -81,12 +29,6 @@ dependencies = [ "syn", ] -[[package]] -name = "either" -version = "1.6.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457" - [[package]] name = "ghost" version = "0.1.2" @@ -98,15 +40,6 @@ dependencies = [ "syn", ] -[[package]] -name = "hermit-abi" -version = "0.1.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "322f4de77956e22ed0e5032c359a0f1273f1f7f0d79bfa3b8ffbc730d7fbcc5c" -dependencies = [ - "libc", -] - [[package]] name = "indoc" version = "0.3.6" @@ -161,12 +94,6 @@ dependencies = [ "syn", ] -[[package]] -name = "lazy_static" -version = "1.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" - [[package]] name = "libc" version = "0.2.93" @@ -182,25 +109,6 @@ dependencies = [ "scopeguard", ] -[[package]] -name = "memoffset" -version = "0.6.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f83fb6581e8ed1f85fd45c116db8405483899489e38406156c25eb743554361d" -dependencies = [ - "autocfg", -] - -[[package]] -name = "num_cpus" -version = "1.13.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05499f3756671c15885fee9034446956fff3f243d6077b91e5767df161f766b3" -dependencies = [ - "hermit-abi", - "libc", -] - [[package]] name = "parking_lot" version = "0.11.1" @@ -308,31 +216,6 @@ dependencies = [ "proc-macro2", ] -[[package]] -name = "rayon" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b0d8e0819fadc20c74ea8373106ead0600e3a67ef1fe8da56e39b9ae7275674" -dependencies = [ - "autocfg", - "crossbeam-deque", - "either", - "rayon-core", -] - -[[package]] -name = "rayon-core" -version = "1.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ab346ac5921dc62ffa9f89b7a773907511cdfa5490c572ae9be1be33e8afa4a" -dependencies = [ - "crossbeam-channel", - "crossbeam-deque", - "crossbeam-utils", - "lazy_static", - "num_cpus", -] - [[package]] name = "redox_syscall" version = "0.2.6" diff --git a/Cargo.toml b/Cargo.toml index 8612fc4..032c909 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,4 +12,3 @@ crate-type = ["cdylib"] [dependencies] pyo3 = { version = "0.13.1", features = ["extension-module"] } -rayon = { version = "1.5.0" } diff --git a/rle/tests/test_decode.py b/rle/tests/test_decode.py index 0712409..f0063bc 100644 --- a/rle/tests/test_decode.py +++ b/rle/tests/test_decode.py @@ -27,10 +27,6 @@ INDEX = get_indexed_datasets('1.2.840.10008.1.2.5') -REF = [ - ('MR_small_RLE.dcm', 2, (64, 1948)), -] - HEADER_DATA = [ # (Number of segments, offsets) @@ -78,7 +74,7 @@ def as_bytes(self, offsets): def test_bits_allocated_zero_raises(self): """Test exception raised for BitsAllocated 0.""" msg = ( - r"The \(0028,0010\) 'Bits Allocated' value must be greater than 0" + r"The \(0028,0100\) 'Bits Allocated' value must be greater than 0" ) with pytest.raises(ValueError, match=msg): decode_frame(b'\x00\x00\x00\x00', 1, 0) @@ -86,7 +82,7 @@ def test_bits_allocated_zero_raises(self): def test_bits_allocated_not_octal_raises(self): """Test exception raised for BitsAllocated not a multiple of 8.""" msg = ( - r"The \(0028,0010\) 'Bits Allocated' value must be a multiple of 8" + r"The \(0028,0100\) 'Bits Allocated' value must be a multiple of 8" ) with pytest.raises(ValueError, match=msg): decode_frame(b'\x00\x00\x00\x00', 1, 12) @@ -94,7 +90,7 @@ def test_bits_allocated_not_octal_raises(self): def test_bits_allocated_large_raises(self): """Test exception raised for BitsAllocated greater than 64.""" msg = ( - r"A \(0028,0010\) 'Bits Allocated' value greater than " + r"A \(0028,0100\) 'Bits Allocated' value greater than " r"64 is not supported" ) with pytest.raises(ValueError, match=msg): diff --git a/rle/tests/test_encode.py b/rle/tests/test_encode.py index 35a2710..22e44d5 100644 --- a/rle/tests/test_encode.py +++ b/rle/tests/test_encode.py @@ -4,12 +4,23 @@ import pytest import sys -from pydicom import dcmread -from pydicom.data import get_testdata_file -from pydicom.encaps import defragment_data -from pydicom.pixel_data_handlers.util import ( - pixel_dtype, reshape_pixel_array -) + +try: + from pydicom import dcmread + from pydicom.data import get_testdata_file + from pydicom.dataset import Dataset, FileMetaDataset + from pydicom.encaps import generate_pixel_data_frame, defragment_data + from pydicom.pixel_data_handlers.rle_handler import ( + _parse_rle_header, _rle_decode_frame, _rle_decode_segment + ) + from pydicom.pixel_data_handlers.util import ( + pixel_dtype, reshape_pixel_array + ) + from pydicom.uid import RLELossless + HAVE_PYDICOM = True +except ImportError: + HAVE_PYDICOM = False + from rle.data import get_indexed_datasets from rle._rle import ( @@ -17,7 +28,9 @@ ) -INDEX = get_indexed_datasets('1.2.840.10008.1.2.5') +INDEX_RLE = get_indexed_datasets('1.2.840.10008.1.2.5') +INDEX_LEI = get_indexed_datasets('1.2.840.10008.1.2') +INDEX_LEE = get_indexed_datasets('1.2.840.10008.1.2.1') # Tests for RLE encoding @@ -106,118 +119,11 @@ def test_encode(self, row, ref): assert ref == encode_row(row.tobytes()) -def test_encode_segment(): - data = bytes([0] * 128 + [1, 2] * 64) * 20 - #data = np.asarray(data, dtype='uint8').tobytes() - result = encode_segment(data, 512) - print(len(data)) - print(data[:50]) - print(result[:50]) - out = decode_segment(result) - assert data == out - - -def test_encode_frame(): - # u16, little endian, 3 spp - #ds = dcmread(get_testdata_file("SC_rgb_16bit.dcm")) - # u16, big endian, 3 spp, - ds = INDEX["SC_rgb_rle_16bit.dcm"]['ds'] - #b = defragment_data(ds.PixelData) - #print(' '.join([f"{ii}" for ii in b[:64]])) - arr_be_a = ds.pixel_array - arr_be_a[0, 0, 0] = 10 - byteorder = arr_be_a.dtype.byteorder - - # TODO: Add to utils function - # maybe check whether C or F ordering too? - if byteorder == '=': - byteorder = '<' if sys.byteorder == "little" else '>' - - print(arr_be_a.tobytes()[:120]) - - frame_be = encode_frame( - arr_be_a.tobytes(), - ds.Rows, - ds.Columns, - ds.SamplesPerPixel, - ds.BitsAllocated, - byteorder - ) - - out_be = decode_frame(frame_be, ds.Rows * ds.Columns, ds.BitsAllocated) - dtype = pixel_dtype(ds).newbyteorder('>') - arr_be = np.frombuffer(out_be, dtype) - arr_be = reshape_pixel_array(ds, arr_be) - - - ds = dcmread(get_testdata_file("SC_rgb_16bit.dcm")) - arr_le_a = ds.pixel_array - arr_le_a[0, 0, 0] = 10 - - byteorder = arr_le_a.dtype.byteorder - if byteorder == '=': - byteorder = '<' if sys.byteorder == "little" else '>' - - print(arr_le_a.tobytes()[:120]) - - frame_le = encode_frame( - arr_le_a.tobytes(), - ds.Rows, - ds.Columns, - ds.SamplesPerPixel, - ds.BitsAllocated, - byteorder - ) - - out_le = decode_frame(frame_le, ds.Rows * ds.Columns, ds.BitsAllocated) - dtype = pixel_dtype(ds).newbyteorder('>') - arr_le = np.frombuffer(out_le, dtype) - - if ds.SamplesPerPixel == 1: - arr_le = arr_le.reshape(ds.Rows, ds.Columns) - else: - # RLE is planar configuration 1 - arr_le = np.reshape(arr_le, (ds.SamplesPerPixel, ds.Rows, ds.Columns)) - arr_le = arr_le.transpose(1, 2, 0) - - - import matplotlib.pyplot as plt - fig, (ax1, ax2) = plt.subplots(1, 2) - ax1.imshow(arr_be) - ax2.imshow(arr_le) - plt.show() - - assert out_le == out_be - - - -def test_numpy(): - #ds = INDEX["SC_rgb_rle_16bit.dcm"]['ds'] - ds = dcmread(get_testdata_file("SC_rgb_16bit.dcm")) - arr = ds.pixel_array - arr[0, 0, 0] = 10 - print(arr) - print(arr.tobytes()[:20]) - - # [Channel][Column index][byte order]-[Row index] - # spp 3, bpp 1, 100x100 - # R0-0, G0-0, B0-0, ..., B99-0, R0-1, G0-1, B0-1, ..., B99-1, ... - - # Big endian byte order - # spp 3, bpp 2, 100x100: a = MSB, b = LSB - # R0a-0, R0b-0, G0a-0, G0b-0, B0a-0, B0a-0, ..., B99a-0, B99b-0, R0a-1, ... - - # Little endian byte order - # spp 3, bpp 2, 100x100: a = MSB, b = LSB - # R0b-0, R0a-0, G0b-0, G0a-0, B0b-0, B0a-0, ..., B99b-0, B99a-0, R0b-1, ... - - - class TestEncodeSegment: """Tests for _rle.encode_segment.""" def test_one_row(self): """Test encoding data that contains only a single row.""" - ds = INDEX["OBXXXX1A_rle.dcm"]['ds'] + ds = INDEX_RLE["OBXXXX1A_rle.dcm"]['ds'] pixel_data = defragment_data(ds.PixelData) decoded = decode_segment(pixel_data[64:]) assert ds.Rows * ds.Columns == len(decoded) @@ -235,7 +141,7 @@ def test_one_row(self): def test_cycle(self): """Test the decoded data remains the same after encoding/decoding.""" - ds = INDEX["OBXXXX1A_rle.dcm"]['ds'] + ds = INDEX_RLE["OBXXXX1A_rle.dcm"]['ds'] pixel_data = defragment_data(ds.PixelData) decoded = decode_segment(pixel_data[64:]) assert ds.Rows * ds.Columns == len(decoded) @@ -249,222 +155,73 @@ def test_cycle(self): assert decoded == redecoded -@pytest.mark.skip() -class TestEncodePlane: - """Tests for rle_handler._rle_encode_plane.""" - def test_8bit(self): - """Test encoding an 8-bit plane into 1 segment.""" - ds = dcmread(RLE_8_1_1F) - pixel_data = defragment_data(ds.PixelData) - decoded = _rle_decode_frame(pixel_data, ds.Rows, ds.Columns, - ds.SamplesPerPixel, ds.BitsAllocated) - assert ds.Rows * ds.Columns * ds.BitsAllocated // 8 == len(decoded) - arr = np.frombuffer(decoded, 'uint8').reshape(ds.Rows, ds.Columns) - # Re-encode the decoded data - encoded = bytearray() - nr_segments = 0 - for segment in _rle_encode_plane(arr): - encoded.extend(segment) - nr_segments += 1 - - # Add header - header = b'\x01\x00\x00\x00\x40\x00\x00\x00' - header += b'\x00' * (64 - len(header)) - - assert 1 == nr_segments - - # Decode the re-encoded data and check that it's the same - redecoded = _rle_decode_frame(header + encoded, - ds.Rows, ds.Columns, - ds.SamplesPerPixel, ds.BitsAllocated) - assert ds.Rows * ds.Columns * ds.SamplesPerPixel == len(redecoded) - assert decoded == redecoded - - def test_16bit(self): - """Test encoding a 16-bit plane into 2 segments.""" - ds = dcmread(RLE_16_1_1F) - pixel_data = defragment_data(ds.PixelData) - decoded = _rle_decode_frame(pixel_data, ds.Rows, ds.Columns, - ds.SamplesPerPixel, ds.BitsAllocated) - assert ds.Rows * ds.Columns * ds.BitsAllocated // 8 == len(decoded) - - # `decoded` is in big endian byte ordering - dtype = np.dtype('uint16').newbyteorder('>') - arr = np.frombuffer(decoded, dtype).reshape(ds.Rows, ds.Columns) - - # Re-encode the decoded data - encoded = bytearray() - nr_segments = 0 - offsets = [64] - for segment in _rle_encode_plane(arr): - offsets.append(offsets[nr_segments] + len(segment)) - encoded.extend(segment) - nr_segments += 1 - - assert 2 == nr_segments - - # Add header - header = b'\x02\x00\x00\x00' - header += pack('<2L', *offsets[:-1]) - header += b'\x00' * (64 - len(header)) - - # Decode the re-encoded data and check that it's the same - redecoded = _rle_decode_frame(header + encoded, - ds.Rows, ds.Columns, - ds.SamplesPerPixel, ds.BitsAllocated) - assert ds.Rows * ds.Columns * ds.BitsAllocated // 8 == len(redecoded) - assert decoded == redecoded - - def test_16bit_segment_order(self): - """Test that the segment order per 16-bit sample is correct.""" - # Native byte ordering - data = b'\x00\x00\x01\xFF\xFE\x00\xFF\xFF\x10\x12' - dtype = np.dtype('uint16') - arr = np.frombuffer(data, dtype) - - segments = [] - for segment in _rle_encode_plane(arr): - segments.append(segment) - - assert 2 == len(segments) - - # Each segment should start with a literal run marker of 0x04 - # and MSB should be first segment, then LSB in second - if sys.byteorder == 'little': - assert b'\x04\x00\xFF\x00\xFF\x12' == segments[0] - assert b'\x04\x00\x01\xFE\xFF\x10' == segments[1] - else: - assert b'\x04\x00\x01\xFE\xFF\x10' == segments[0] - assert b'\x04\x00\xFF\x00\xFF\x12' == segments[1] - - # Little endian - arr = np.frombuffer(data, dtype.newbyteorder('<')) - assert [0, 65281, 254, 65535, 4624] == arr.tolist() - - segments = [] - for segment in _rle_encode_plane(arr): - segments.append(segment) - - assert 2 == len(segments) - assert b'\x04\x00\xFF\x00\xFF\x12' == segments[0] - assert b'\x04\x00\x01\xFE\xFF\x10' == segments[1] - - # Big endian - arr = np.frombuffer(data, dtype.newbyteorder('>')) - assert [0, 511, 65024, 65535, 4114] == arr.tolist() - - segments = [] - for segment in _rle_encode_plane(arr): - segments.append(segment) - - assert 2 == len(segments) - assert b'\x04\x00\x01\xFE\xFF\x10' == segments[0] - assert b'\x04\x00\xFF\x00\xFF\x12' == segments[1] - - def test_32bit(self): - """Test encoding a 32-bit plane into 4 segments.""" - ds = dcmread(RLE_32_1_1F) - pixel_data = defragment_data(ds.PixelData) - decoded = _rle_decode_frame(pixel_data, ds.Rows, ds.Columns, - ds.SamplesPerPixel, ds.BitsAllocated) - assert ds.Rows * ds.Columns * ds.BitsAllocated // 8 == len(decoded) - - # `decoded` is in big endian byte ordering - dtype = np.dtype('uint32').newbyteorder('>') - arr = np.frombuffer(decoded, dtype).reshape(ds.Rows, ds.Columns) - - # Re-encode the decoded data - encoded = bytearray() - nr_segments = 0 - offsets = [64] - for segment in _rle_encode_plane(arr): - offsets.append(offsets[nr_segments] + len(segment)) - encoded.extend(segment) - nr_segments += 1 - - assert 4 == nr_segments - - # Add header - header = b'\x04\x00\x00\x00' - header += pack('<4L', *offsets[:-1]) - header += b'\x00' * (64 - len(header)) - - # Decode the re-encoded data and check that it's the same - redecoded = _rle_decode_frame(header + encoded, - ds.Rows, ds.Columns, - ds.SamplesPerPixel, ds.BitsAllocated) - assert ds.Rows * ds.Columns * ds.BitsAllocated // 8 == len(redecoded) - assert decoded == redecoded - - def test_32bit_segment_order(self): - """Test that the segment order per 32-bit sample is correct.""" - # Native byte ordering - data = b'\x00\x00\x00\x00\x01\xFF\xFE\x0A\xFF\xFC\x10\x12' - dtype = np.dtype('uint32') - arr = np.frombuffer(data, dtype) - - segments = [] - for segment in _rle_encode_plane(arr): - segments.append(segment) - - assert 4 == len(segments) - - # Each segment should start with a literal run marker of 0x02 - if sys.byteorder == 'little': - assert b'\x02\x00\x0A\x12' == segments[0] - assert b'\x02\x00\xFE\x10' == segments[1] - assert b'\x02\x00\xFF\xFC' == segments[2] - assert b'\x02\x00\x01\xFF' == segments[3] - else: - assert b'\x02\x00\x01\xFF' == segments[0] - assert b'\x02\x00\xFF\xFC' == segments[1] - assert b'\x02\x00\xFE\x10' == segments[2] - assert b'\x02\x00\x0A\x12' == segments[3] - - # Little endian - arr = np.frombuffer(data, dtype.newbyteorder('<')) - assert [0, 184483585, 303103231] == arr.tolist() - - segments = [] - for segment in _rle_encode_plane(arr): - segments.append(segment) - - assert 4 == len(segments) - assert b'\x02\x00\x0A\x12' == segments[0] - assert b'\x02\x00\xFE\x10' == segments[1] - assert b'\x02\x00\xFF\xFC' == segments[2] - assert b'\x02\x00\x01\xFF' == segments[3] - - # Big endian - arr = np.frombuffer(data, dtype.newbyteorder('>')) - assert [0, 33553930, 4294709266] == arr.tolist() - - segments = [] - for segment in _rle_encode_plane(arr): - segments.append(segment) - - assert 4 == len(segments) - assert b'\x02\x00\x01\xFF' == segments[0] - assert b'\x02\x00\xFF\xFC' == segments[1] - assert b'\x02\x00\xFE\x10' == segments[2] - assert b'\x02\x00\x0A\x12' == segments[3] - - def test_padding(self): - """Test that odd length encoded segments are padded.""" - data = b'\x00\x04\x01\x15' - arr = np.frombuffer(data, 'uint8') - segments = [] - for segment in _rle_encode_plane(arr): - segments.append(segment) - - # The segment should start with a literal run marker of 0x03 - # then 4 bytes of RLE encoded data, then 0x00 padding - assert b'\x03\x00\x04\x01\x15\x00' == segments[0] +RLE_MATCHING_DATASETS = [ + # (compressed, reference, nr frames) + pytest.param( + INDEX_RLE["OBXXXX1A_rle.dcm"]['ds'], + INDEX_LEE["OBXXXX1A.dcm"]['ds'], + 1 + ), + pytest.param( + INDEX_RLE["OBXXXX1A_rle_2frame.dcm"]['ds'], + INDEX_LEE["OBXXXX1A_2frame.dcm"]['ds'], + 2 + ), + pytest.param( + INDEX_RLE["SC_rgb_rle.dcm"]['ds'], + INDEX_LEE["SC_rgb.dcm"]['ds'], + 1 + ), + pytest.param( + INDEX_RLE["SC_rgb_rle_2frame.dcm"]['ds'], + INDEX_LEE["SC_rgb_2frame.dcm"]['ds'], + 2 + ), + pytest.param( + INDEX_RLE["MR_small_RLE.dcm"]['ds'], + INDEX_LEE["MR_small.dcm"]['ds'], + 1 + ), + pytest.param( + INDEX_RLE["emri_small_RLE.dcm"]['ds'], + INDEX_LEE["emri_small.dcm"]['ds'], + 10 + ), + pytest.param( + INDEX_RLE["SC_rgb_rle_16bit.dcm"]['ds'], + INDEX_LEE["SC_rgb_16bit.dcm"]['ds'], + 1 + ), + pytest.param( + INDEX_RLE["SC_rgb_rle_16bit_2frame.dcm"]['ds'], + INDEX_LEE["SC_rgb_16bit_2frame.dcm"]['ds'], + 2 + ), + pytest.param( + INDEX_RLE["rtdose_rle_1frame.dcm"]['ds'], + INDEX_LEI["rtdose_1frame.dcm"]['ds'], + 1 + ), + pytest.param( + INDEX_RLE["rtdose_rle.dcm"]['ds'], + INDEX_LEI["rtdose.dcm"]['ds'], + 15 + ), + pytest.param( + INDEX_RLE["SC_rgb_rle_32bit.dcm"]['ds'], + INDEX_LEE["SC_rgb_32bit.dcm"]['ds'], + 1 + ), + pytest.param( + INDEX_RLE["SC_rgb_rle_32bit_2frame.dcm"]['ds'], + INDEX_LEE["SC_rgb_32bit_2frame.dcm"]['ds'], + 2 + ), +] -@pytest.mark.skip() -class TestNumpy_RLEEncodeFrame: - """Tests for rle_handler.rle_encode_frame.""" +class TestEncodeFrame: + """Tests for _rle.encode_frame.""" def setup(self): """Setup the tests.""" # Create a dataset skeleton for use in the cycle tests @@ -477,132 +234,56 @@ def setup(self): ds.PlanarConfiguration = 1 self.ds = ds - def test_cycle_8bit_1sample(self): - """Test an encode/decode cycle for 8-bit 1 sample/pixel.""" - ds = dcmread(EXPL_8_1_1F) - ref = ds.pixel_array - assert 8 == ds.BitsAllocated - assert 1 == ds.SamplesPerPixel - - encoded = rle_encode_frame(ref) - decoded = _rle_decode_frame(encoded, ds.Rows, ds.Columns, - ds.SamplesPerPixel, ds.BitsAllocated) - dtype = np.dtype('uint8').newbyteorder('>') - arr = np.frombuffer(decoded, dtype) - arr = reshape_pixel_array(ds, arr) - - assert np.array_equal(ref, arr) - - def test_cycle_8bit_3sample(self): - """Test an encode/decode cycle for 8-bit 3 sample/pixel.""" - ds = dcmread(EXPL_8_3_1F) - ref = ds.pixel_array - assert 8 == ds.BitsAllocated - assert 3 == ds.SamplesPerPixel - - encoded = rle_encode_frame(ref) - decoded = _rle_decode_frame(encoded, ds.Rows, ds.Columns, - ds.SamplesPerPixel, ds.BitsAllocated) - arr = np.frombuffer(decoded, 'uint8') - # The decoded data is planar configuration 1 - ds.PlanarConfiguration = 1 - arr = reshape_pixel_array(ds, arr) - - assert np.array_equal(ref, arr) + @pytest.mark.skipif(not HAVE_PYDICOM, reason="No pydicom") + @pytest.mark.parametrize("_, ds, nr_frames", RLE_MATCHING_DATASETS) + def test_cycle(self, _, ds, nr_frames): + """Encode pixel data, then decode it and compare against original""" + if nr_frames > 1: + return - def test_cycle_16bit_1sample(self): - """Test an encode/decode cycle for 16-bit 1 sample/pixel.""" - ds = dcmread(EXPL_16_1_1F) - ref = ds.pixel_array - assert 16 == ds.BitsAllocated - assert 1 == ds.SamplesPerPixel + params = (ds.Rows, ds.Columns, ds.SamplesPerPixel, ds.BitsAllocated) + encoded = encode_frame(ds.PixelData, *params, '<') + decoded = _rle_decode_frame(encoded, *params) - encoded = rle_encode_frame(ref) - decoded = _rle_decode_frame(encoded, ds.Rows, ds.Columns, - ds.SamplesPerPixel, ds.BitsAllocated) - dtype = np.dtype('uint16').newbyteorder('>') + dtype = pixel_dtype(ds).newbyteorder('>') arr = np.frombuffer(decoded, dtype) - arr = reshape_pixel_array(ds, arr) - assert np.array_equal(ref, arr) - - def test_cycle_16bit_3sample(self): - """Test an encode/decode cycle for 16-bit 3 sample/pixel.""" - ds = dcmread(EXPL_16_3_1F) - ref = ds.pixel_array - assert 16 == ds.BitsAllocated - assert 3 == ds.SamplesPerPixel - - encoded = rle_encode_frame(ref) - decoded = _rle_decode_frame(encoded, ds.Rows, ds.Columns, - ds.SamplesPerPixel, ds.BitsAllocated) - dtype = np.dtype('uint16').newbyteorder('>') - arr = np.frombuffer(decoded, dtype) - # The decoded data is planar configuration 1 - ds.PlanarConfiguration = 1 - arr = reshape_pixel_array(ds, arr) - - assert np.array_equal(ref, arr) - - def test_cycle_32bit_1sample(self): - """Test an encode/decode cycle for 32-bit 1 sample/pixel.""" - ds = dcmread(EXPL_32_1_1F) - ref = ds.pixel_array - assert 32 == ds.BitsAllocated - assert 1 == ds.SamplesPerPixel - - encoded = rle_encode_frame(ref) - decoded = _rle_decode_frame(encoded, ds.Rows, ds.Columns, - ds.SamplesPerPixel, ds.BitsAllocated) - dtype = np.dtype('uint32').newbyteorder('>') - arr = np.frombuffer(decoded, dtype) - arr = reshape_pixel_array(ds, arr) - - assert np.array_equal(ref, arr) - - def test_cycle_32bit_3sample(self): - """Test an encode/decode cycle for 32-bit 3 sample/pixel.""" - ds = dcmread(EXPL_32_3_1F) - ref = ds.pixel_array - assert 32 == ds.BitsAllocated - assert 3 == ds.SamplesPerPixel - - encoded = rle_encode_frame(ref) - decoded = _rle_decode_frame(encoded, ds.Rows, ds.Columns, - ds.SamplesPerPixel, ds.BitsAllocated) - dtype = np.dtype('uint32').newbyteorder('>') - arr = np.frombuffer(decoded, dtype) - # The decoded data is planar configuration 1 - ds.PlanarConfiguration = 1 - arr = reshape_pixel_array(ds, arr) + if ds.SamplesPerPixel == 1: + arr = arr.reshape(ds.Rows, ds.Columns) + else: + # RLE is planar configuration 1 + arr = np.reshape(arr, (ds.SamplesPerPixel, ds.Rows, ds.Columns)) + arr = arr.transpose(1, 2, 0) - assert np.array_equal(ref, arr) + assert np.array_equal(ds.pixel_array, arr) def test_16_segments_raises(self): """Test that trying to encode 16-segments raises exception.""" - arr = np.asarray([[[1, 2, 3, 4]]], dtype='uint32') - assert (1, 1, 4) == arr.shape - assert 4 == arr.dtype.itemsize + arr = np.asarray([[[1, 2, 3]]], dtype='uint64') + assert (1, 1, 3) == arr.shape + assert 8 == arr.dtype.itemsize msg = ( - r"Unable to encode as the DICOM standard only allows " + r"Unable to encode as the DICOM Standard only allows " r"a maximum of 15 segments in RLE encoded data" ) with pytest.raises(ValueError, match=msg): - rle_encode_frame(arr) + encode_frame(arr.tobytes(), 1, 1, 3, 64, '<') - def test_15_segment(self): - """Test encoding 15-segments works as expected.""" + def test_12_segment(self): + """Test encoding 12-segments works as expected.""" + # Most that can be done w/ -rle arr = np.asarray( - [[[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]], - dtype='uint8' + [[[1, 2, 3]]], + dtype='uint32' ) - assert (1, 1, 15) == arr.shape - assert 1 == arr.dtype.itemsize + # 00 00 00 01 | 00 00 00 02 | 00 00 00 03 + assert (1, 1, 3) == arr.shape + assert 4 == arr.dtype.itemsize - encoded = rle_encode_frame(arr) + encoded = encode_frame(arr.tobytes(), 1, 1, 3, 32, '<') header = ( - b'\x0f\x00\x00\x00' + b'\x0c\x00\x00\x00' b'\x40\x00\x00\x00' b'\x42\x00\x00\x00' b'\x44\x00\x00\x00' @@ -615,37 +296,41 @@ def test_15_segment(self): b'\x52\x00\x00\x00' b'\x54\x00\x00\x00' b'\x56\x00\x00\x00' - b'\x58\x00\x00\x00' - b'\x5a\x00\x00\x00' - b'\x5c\x00\x00\x00' + b'\x00\x00\x00\x00' + b'\x00\x00\x00\x00' + b'\x00\x00\x00\x00' ) assert header == encoded[:64] assert ( - b'\x00\x01\x00\x02\x00\x03\x00\x04\x00\x05\x00\x06' - b'\x00\x07\x00\x08\x00\x09\x00\x0a\x00\x0b\x00\x0c' - b'\x00\x0d\x00\x0e\x00\x0f' + b'\x00\x00\x00\x00\x00\x00\x00\x01' + b'\x00\x00\x00\x00\x00\x00\x00\x02' + b'\x00\x00\x00\x00\x00\x00\x00\x03' ) == encoded[64:] + @pytest.mark.skipif(not HAVE_PYDICOM, reason="No pydicom") def test_encoding_multiple_frames_raises(self): """Test encoding multiple framed pixel data raises exception.""" # Note: only works with multi-sample data - ds = dcmread(EXPL_8_3_2F) + ds = INDEX_LEE["SC_rgb_2frame.dcm"]['ds'] arr = ds.pixel_array assert ds.NumberOfFrames > 1 assert len(arr.shape) == 4 + params = (ds.Rows, ds.Columns, ds.SamplesPerPixel, ds.BitsAllocated) + msg = ( - r"Unable to encode multiple frames at once, please encode one " - r"frame at a time" + r"The length of the data to be encoded is not consistent with the " + r"the values of the dataset's 'Rows', 'Columns', 'Samples per " + r"Pixel' and 'Bits Allocated' elements" ) with pytest.raises(ValueError, match=msg): - rle_encode_frame(arr) + encode_frame(arr.tobytes(), *params, '<') def test_single_row_1sample(self): """Test encoding a single row of 1 sample/pixel data.""" # Rows 1, Columns 5, SamplesPerPixel 1 arr = np.asarray([[0, 1, 2, 3, 4]], dtype='uint8') assert (1, 5) == arr.shape - encoded = rle_encode_frame(arr) + encoded = encode_frame(arr.tobytes(), 1, 5, 1, 8, '<') header = b'\x01\x00\x00\x00\x40\x00\x00\x00' + b'\x00' * 56 assert header == encoded[:64] assert b'\x04\x00\x01\x02\x03\x04' == encoded[64:] @@ -658,7 +343,7 @@ def test_single_row_3sample(self): dtype='uint8' ) assert (1, 5, 3) == arr.shape - encoded = rle_encode_frame(arr) + encoded = encode_frame(arr.tobytes(), 1, 5, 3, 8, '<') header = ( b'\x03\x00\x00\x00' b'\x40\x00\x00\x00' @@ -672,3 +357,45 @@ def test_single_row_3sample(self): b'\x04\x00\x01\x02\x03\x04' b'\x04\x00\x01\x02\x03\x04' ) == encoded[64:] + + def test_padding(self): + """Test that odd length encoded segments are padded.""" + data = b'\x00\x04\x01\x15' + out = encode_frame(data, 1, 4, 1, 8, '<') + + # The segment should start with a literal run marker of 0x03 + # then 4 bytes of RLE encoded data, then 0x00 padding + assert b'\x03\x00\x04\x01\x15\x00' == out[64:] + + def test_16bit_segment_order(self): + """Test that the segment order per 16-bit sample is correct.""" + # Native byte ordering + data = b'\x00\x00\x01\xFF\xFE\x00\xFF\xFF\x10\x12' + + # Test little endian input + out = encode_frame(data, 1, 5, 1, 16, '<') + assert b'\x04\x00\xFF\x00\xFF\x12' == out[64:70] + assert b'\x04\x00\x01\xFE\xFF\x10' == out[70:76] + + # Test big endian input + out = encode_frame(data, 1, 5, 1, 16, '>') + assert b'\x04\x00\x01\xFE\xFF\x10' == out[64:70] + assert b'\x04\x00\xFF\x00\xFF\x12' == out[70:76] + + def test_32bit_segment_order(self): + """Test that the segment order per 32-bit sample is correct.""" + data = b'\x00\x00\x00\x00\x01\xFF\xFE\x0A\xFF\xFC\x10\x12' + + # Test little endian input + out = encode_frame(data, 1, 3, 1, 32, '<') + assert b'\x02\x00\x0A\x12' == out[64:68] + assert b'\x02\x00\xFE\x10' == out[68:72] + assert b'\x02\x00\xFF\xFC' == out[72:76] + assert b'\x02\x00\x01\xFF' == out[76:80] + + # Test big endian input + out = encode_frame(data, 1, 3, 1, 32, '>') + assert b'\x02\x00\x01\xFF' == out[64:68] + assert b'\x02\x00\xFF\xFC' == out[68:72] + assert b'\x02\x00\xFE\x10' == out[72:76] + assert b'\x02\x00\x0A\x12' == out[76:80] diff --git a/rle/utils.py b/rle/utils.py index 69b37d3..c8f8f78 100644 --- a/rle/utils.py +++ b/rle/utils.py @@ -1,9 +1,11 @@ """Utility functions.""" +import sys +from typing import Generator import numpy as np -from rle._rle import decode_frame, decode_segment, parse_header +from rle._rle import decode_frame, decode_segment, encode_frame, encode_segment def decode_pixel_data(stream: bytes, ds: "Dataset") -> "np.ndarray": @@ -36,6 +38,102 @@ def decode_pixel_data(stream: bytes, ds: "Dataset") -> "np.ndarray": ) +def encode_array(arr: "np.ndarray", **kwargs) -> Generator[bytes, None, None]: + """Yield RLE encoded frames from `arr`. + + Parameters + ---------- + arr : numpy.ndarray + The array of data to be RLE encoded + kwargs : dict + A dictionary containing keyword arguments. Required keys are either: + + * ``{'ds': pydicom.dataset.Dataset}``, which is the corresponding + dataset, or + * ``{'rows': int, 'columns': int, samples_per_px': int, + 'bits_per_px': int}``. + + Yields + ------ + bytes + An RLE encoded frame from `arr`. + """ + byteorder = arr.dtype.byteorder + if byteorder == '=': + byteorder = '<' if sys.byteorder == "little" else '>' + + kwargs['byteorder'] = byteorder + + if getattr(ds, "NumberOfFrames", 1) > 1: + for frame in arr: + yield encode_pixel_data(frame.tobytes(), kwargs) + else: + yield encode_pixel_data(arr.tobytes(), kwargs) + + +def encode_pixel_data(stream: bytes, **kwargs) -> bytes: + """Return `stream` encoded using the DICOM RLE (PackBits) algorithm. + + .. warning:: + + *Samples per Pixel* x *Bits Allocated* must be less than or equal + to 15 in order to meet the requirements of the *RLE Lossless* + transfer syntax. + + Parameters + ---------- + stream : bytes + The image frame data to be RLE encoded. Only 1 frame can be encoded + at a time. + kwargs : dict + A dictionary containing keyword arguments. Required keys are: + + * ``{'byteorder': str}``, required if the number of samples per + pixel is greater than 1. If `stream` is in little-endian byte order + then ``'<'``, otherwise ``'>'`` for big-endian. + + And either: + + * ``{'ds': pydicom.dataset.Dataset}``, which is the dataset + corresponding to `stream` with matching values for *Rows*, *Columns*, + *Samples per Pixel* and *Bits Allocated*, or + * ``{'rows': int, 'columns': int, samples_per_px': int, + 'bits_per_px': int}``. + + Returns + ------- + bytes + The RLE encoded frame. + """ + if 'ds' in kwargs: + r, c = ds.Rows, ds.Columns + bpp = ds.BitsAllocated + spp = ds.SamplesPerPixel + else: + r, c = kwargs['rows'], kwargs['columns'] + bpp = kwargs['bits_per_pixel'] + spp = kwargs['samples_per_px'] + + # Validate input + if bpp not in [8, 16, 32, 64]: + raise NotImplementedError("'Bits Allocated' must be 8, 16, 32 or 64") + + if spp not in [1, 3]: + raise ValueError("'Samples per Pixel' must be 1 or 3") + + if bpp * spp > 15: + raise ValueError( + "Unable to encode the data as the DICOM Standard blah blah" + ) + + if len(stream) != (r * c * bpp / 8 * spp): + raise ValueError( + "The length of the data doesn't not match" + ) + + return encode_frame(stream, r, c, spp, bpp, kwargs['byteorder']) + + def generate_frames( ds: "Dataset", reshape: bool = True, rle_segment_order: str = '>' ) -> "np.ndarray": diff --git a/src/lib.rs b/src/lib.rs index a67409b..4bbffdb 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -112,17 +112,17 @@ fn _decode_frame( // Pre-define our errors for neatness let err_bits_zero = Err( String::from( - "The (0028,0010) 'Bits Allocated' value must be greater than 0" + "The (0028,0100) 'Bits Allocated' value must be greater than 0" ).into(), ); let err_bits_not_octal = Err( String::from( - "The (0028,0010) 'Bits Allocated' value must be a multiple of 8" + "The (0028,0100) 'Bits Allocated' value must be a multiple of 8" ).into(), ); let err_invalid_bytes = Err( String::from( - "A (0028,0010) 'Bits Allocated' value greater than 64 is not supported" + "A (0028,0100) 'Bits Allocated' value greater than 64 is not supported" ).into() ); let err_invalid_offset = Err( @@ -414,6 +414,7 @@ fn _decode_segment(enc: &[u8], out: &mut Vec) -> Result<(), Box> fn encode_frame<'a>( enc: &[u8], rows: u16, cols: u16, spp: u8, bpp: u8, byteorder: char, py: Python<'a> ) -> PyResult<&'a PyBytes> { + // Pre-allocate some space for the encoded data - worst case scenario let mut dst: Vec = Vec::new(); match _encode_frame(enc, &mut dst, rows, cols, spp, bpp, byteorder) { Ok(()) => return Ok(PyBytes::new(py, &dst[..])), @@ -430,7 +431,8 @@ fn _encode_frame( Parameters ---------- src - The data to be RLE encoded. + The data to be RLE encoded, ordered as R1, G1, B1, R2, G2, B2, ..., Rn, Gn, Bn + (i.e. Planar Configuration 0). dst rows @@ -444,24 +446,13 @@ fn _encode_frame( byteorder Required if bpp is greater than 1, '>' if `src` is in big endian byte order, '<' if little endian. - */ let err_invalid_nr_samples = Err( String::from("The (0028,0002) 'Samples per Pixel' must be 1 or 3").into() ); - let err_bits_zero = Err( - String::from( - "The (0028,0010) 'Bits Allocated' value must be greater than 0" - ).into(), - ); - let err_bits_not_octal = Err( - String::from( - "The (0028,0010) 'Bits Allocated' value must be a multiple of 8" - ).into(), - ); - let err_invalid_bytes = Err( + let err_invalid_bits_allocated = Err( String::from( - "A (0028,0010) 'Bits Allocated' value greater than 64 is not supported" + "The (0028,0100) 'Bits Allocated' value must be 8, 16, 32 or 64" ).into() ); let err_invalid_nr_segments = Err( @@ -473,132 +464,86 @@ fn _encode_frame( let err_invalid_parameters = Err( String::from( "The length of the data to be encoded is not consistent with the \ - the values of the dataset's 'Rows', 'Columns', 'Samples per Pixel' \ + the values of the dataset's 'Rows', 'Columns', 'Samples per Pixel' \ and 'Bits Allocated' elements" ).into() ); let err_invalid_byteorder = Err( - String::from("Invalid byte order, must be '>' or '<'").into() + String::from("'byteorder' must be '>' or '<'").into() ); - // Check Samples per Pixel + // Check 'Samples per Pixel' match spp { 1 | 3 => {}, _ => return err_invalid_nr_samples } - // Check Bits Allocated and byte ordering + // Check 'Bits Allocated' match bpp { - 0 => return err_bits_zero, + 0 => return err_invalid_bits_allocated, _ => match bpp % 8 { 0 => {}, - _ => return err_bits_not_octal + _ => return err_invalid_bits_allocated } } - // Ensure `bytes_per_pixel` is in [1, 8] + // Ensure `bytes_per_pixel` is in [1, 2, 4, 8] let bytes_per_pixel: u8 = bpp / 8; - if bytes_per_pixel > 8 { return err_invalid_bytes } - - if bytes_per_pixel > 1 { - match byteorder { + match bytes_per_pixel { + 1 => {}, + 2 | 4 | 8 => match byteorder { '>' | '<' => {}, _ => return err_invalid_byteorder - } + }, + _ => return err_invalid_bits_allocated } - println!( - "src len {}, r {}, c {}, spp {}, bpp {}, bo {}", - src.len(), rows, cols, spp, bpp, byteorder - ); - // Ensure parameters are consistent let r = usize::try_from(rows).unwrap(); let c = usize::try_from(cols).unwrap(); + println!("{}", src.len()); if src.len() != r * c * usize::from(spp * bytes_per_pixel) { return err_invalid_parameters } let nr_segments: u8 = spp * bytes_per_pixel; - println!("Nr segs {}", nr_segments); if nr_segments > 15 { return err_invalid_nr_segments } - let is_big_endian: bool = byteorder == '>'; - println!("is big {}", is_big_endian); - - - // Big endian byte order '>' - // spp 3, bpp 2, 100x100: a = MSB, b = LSB - // R0a-0, R0b-0, G0a-0, G0b-0, B0a-0, B0a-0, ..., B99a-0, B99b-0, R0a-1, ... - - // Little endian byte order '<' - // spp 3, bpp 2, 100x100: a = MSB, b = LSB - // R0b-0, R0a-0, G0b-0, G0a-0, B0b-0, B0a-0, ..., B99b-0, B99a-0, R0b-1, ... - - // Need to be big endian u32s - //let mut header: Vec = vec![0; 16]; + // Reserve 64 bytes of `dst` for the RLE header + // Values in the header are in little endian order dst.extend(u32::from(nr_segments).to_le_bytes().to_vec()); dst.extend([0u8; 60].to_vec()); - println!("header {}", dst.len()); - // Big endian - let mut start = 0; - let process_step = usize::from(spp * bytes_per_pixel); - - let mut start: Vec = (0..usize::from(nr_segments)).collect(); - if !is_big_endian { - // Shuffle from BE to LE ordering - // This is ridiculous! - for ii in 0..usize::from(nr_segments / bytes_per_pixel) { - println!("{} {}", ii, bpp); - match bpp { - 8 => {}, - 16 => start.swap(ii * 2, ii * 2 + 1), - 32 => { - start.swap(ii * 2, ii * 2 + 3); - start.swap(ii * 2 + 1, ii * 2 + 2); - }, - 64 => { - start.swap(ii * 2, ii * 2 + 7); - start.swap(ii * 2 + 1, ii * 2 + 6); - start.swap(ii * 2 + 2, ii * 2 + 5); - start.swap(ii * 2 + 3, ii * 2 + 4); - }, - _ => { return err_invalid_bytes } - } + + // A vector of the start indexes used when segmenting - default big endian + let mut start_indices: Vec = (0..usize::from(nr_segments)).collect(); + if byteorder != '>' { + // `src` has little endian byte ordering + for idx in 0..spp { + let s = usize::from(idx * bytes_per_pixel); + let e = usize::from((idx + 1) * bytes_per_pixel); + start_indices[s..e].reverse(); } } - println!("starts {:?}", start); for idx in 0..usize::from(nr_segments) { - - // Convert to 4 big-endian ordered u8s - let offset = (u32::try_from(dst.len()).unwrap()).to_le_bytes(); - //println!("{:?}", offset); + // Convert to 4x le ordered u8s and update RLE header + let current_offset = (u32::try_from(dst.len()).unwrap()).to_le_bytes(); for ii in idx * 4 + 4..idx * 4 + 8 { // idx = 0: 4, 5, 6, 7 -> 0, 1, 2, 3 // idx = 1: 8, 9, 10, 11 -> 0, 1, 2, 3 - dst[ii] = offset[ii - idx * 4 - 4]; + dst[ii] = current_offset[ii - idx * 4 - 4]; } - println!("idx {}, ps {}, o {:?}", idx, process_step, offset); - // Correlate idx to which bytes to process as a segment - // spp 1, bpp 1 -> nr segments 1 -> process all - // spp 3, bpp 1 -> nr_segments 3 -> process every third then offset by 1 - // spp 1, bpp 2 -> nr_segments 2 -> process every second then offset by 1 - // spp 3, bpp 2 -> nr_segments 6 -> process every sixth then offset by 1 - // spp 3, bpp 4 -> nr_segments 12 -> process every twelfth then offset by 1 - - // Need to reverse order of offset if little endian - // i.e. 0 -> 1 -> 2 -> 3 becomes 3 -> 2 -> 1 -> 0 - - // need to offset src iter start by idx - let segment: Vec = src[start[idx]..].into_iter().step_by(process_step).cloned().collect(); - _encode_segment_b(segment, dst, cols); - //offsets.push(dst.len()); + // Note the offset start of the `src` iter + let segment: Vec = src[start_indices[idx]..] + .into_iter() + .step_by(usize::from(spp * bytes_per_pixel)) + .cloned() + .collect(); + + _encode_segment_from_vector(segment, dst, cols)?; } - println!("{:?}", &dst[..64]); - println!("{:?}", &dst[64..164]); Ok(()) } @@ -607,14 +552,14 @@ fn _encode_frame( #[pyfunction] fn encode_segment<'a>(src: &[u8], cols: u16, py: Python<'a>) -> PyResult<&'a PyBytes> { let mut dst = Vec::new(); - match _encode_segment(src, &mut dst, cols) { + match _encode_segment_from_array(src, &mut dst, cols) { Ok(()) => return Ok(PyBytes::new(py, &dst[..])), Err(err) => return Err(PyValueError::new_err(err.to_string())), } } -fn _encode_segment( +fn _encode_segment_from_array( src: &[u8], dst: &mut Vec, cols: u16 ) -> Result<(), Box> { /* @@ -637,22 +582,23 @@ fn _encode_segment( if src.len() % row_len != 0 { return err_invalid_length } let nr_rows = src.len() / row_len; - let mut offset: usize = 0; + let mut offset: usize; for row_idx in 0..nr_rows { offset = row_idx * row_len; - _encode_row(&src[offset..offset + row_len], dst); + _encode_row(&src[offset..offset + row_len], dst)?; } - // Each segment must be even length or padded to even length with noop byte + // Each segment must be even length or padded to even length with zero if dst.len() % 2 != 0 { - dst.push(128); + dst.push(0); } Ok(()) } -fn _encode_segment_b( + +fn _encode_segment_from_vector( src: Vec, dst: &mut Vec, cols: u16 ) -> Result<(), Box> { /* @@ -675,16 +621,16 @@ fn _encode_segment_b( if src.len() % row_len != 0 { return err_invalid_length } let nr_rows = src.len() / row_len; - let mut offset: usize = 0; + let mut offset: usize; for row_idx in 0..nr_rows { offset = row_idx * row_len; - _encode_row(&src[offset..offset + row_len], dst); + _encode_row(&src[offset..offset + row_len], dst)?; } - // Each segment must be even length or padded to even length with noop byte + // Each segment must be even length or padded to even length with zero if dst.len() % 2 != 0 { - dst.push(128); + dst.push(0); } Ok(()) @@ -755,8 +701,8 @@ fn _encode_row(src: &[u8], dst: &mut Vec) -> Result<(), Box> { } // Maximum length of a literal/replicate run - let MAX_RUN: u8 = 128; - let MAX_SRC = src.len(); + let max_run_length: u8 = 128; + let src_length = src.len(); // `replicate` and `literal` are the length of the current run let mut literal: u8 = 0; @@ -775,7 +721,7 @@ fn _encode_row(src: &[u8], dst: &mut Vec) -> Result<(), Box> { //println!( // "Start of loop - ii: {}/{}, prv: {}, cur: {}, l: {}, r: {}", - // ii, MAX_SRC, previous, current, literal, replicate + // ii, src_length, previous, current, literal, replicate //); // Run type switching/control @@ -802,7 +748,7 @@ fn _encode_row(src: &[u8], dst: &mut Vec) -> Result<(), Box> { // Write out replicate run and reset // `replicate` must be at least 2 to avoid overflow dst.push(257u8.wrapping_sub(replicate)); - // Note use of previous item + // Note use of `previous` item dst.push(previous); replicate = 0; } @@ -810,15 +756,15 @@ fn _encode_row(src: &[u8], dst: &mut Vec) -> Result<(), Box> { } // If the run length is maxed, write out and reset - if replicate == MAX_RUN { // Should be more frequent + if replicate == max_run_length { // Should be more frequent // Write out replicate run and reset dst.push(129); dst.push(previous); replicate = 0; - } else if literal == MAX_RUN { + } else if literal == max_run_length { // Write out literal run and reset dst.push(127); - // The indexing is different here because ii is the current item, not previous + // The indexing is different here because ii is the `current` item, not previous dst.extend(&src[ii + 1 - usize::from(literal)..ii + 1]); literal = 0; } // 128 is noop! @@ -826,7 +772,7 @@ fn _encode_row(src: &[u8], dst: &mut Vec) -> Result<(), Box> { previous = current; ii += 1; - if ii == MAX_SRC { break; } + if ii == src_length { break; } } // Handle cases where the last byte is continuation of a replicate run @@ -845,7 +791,7 @@ fn _encode_row(src: &[u8], dst: &mut Vec) -> Result<(), Box> { // Write out and return // `literal` must be at least 1 or we undeflow dst.push(literal - 1u8); - dst.extend(&src[MAX_SRC - usize::from(literal)..]); + dst.extend(&src[src_length - usize::from(literal)..]); } Ok(()) From bbd8e531df81f2341f90af067ec965be6d91e60d Mon Sep 17 00:00:00 2001 From: scaramallion Date: Sun, 25 Apr 2021 19:15:02 +1000 Subject: [PATCH 14/19] Add utils tests --- rle/benchmarks/bench_encode.py | 61 ++++++++++++++++++++ rle/tests/test_utils.py | 101 +++++++++++++++++++++++++++++++++ rle/utils.py | 16 ++++-- 3 files changed, 173 insertions(+), 5 deletions(-) create mode 100644 rle/benchmarks/bench_encode.py create mode 100644 rle/tests/test_utils.py diff --git a/rle/benchmarks/bench_encode.py b/rle/benchmarks/bench_encode.py new file mode 100644 index 0000000..60468d2 --- /dev/null +++ b/rle/benchmarks/bench_encode.py @@ -0,0 +1,61 @@ + +import asv +import timeit + +from pydicom import dcmread +from pydicom.data import get_testdata_file +from pydicom.encaps import generate_pixel_data_frame +from pydicom.pixel_data_handlers.rle_handler import ( + get_pixeldata, _rle_decode_frame, _rle_encode_row, rle_encode_frame +) +from pydicom.pixel_data_handlers.util import reshape_pixel_array +from pydicom.uid import RLELossless + +from ljdata import get_indexed_datasets +from rle.utils import pixel_array, decode_frame +from rle._rle import encode_row, encode_frame + +INDEX = get_indexed_datasets(RLELossless) + + +u8_1s_1f_rle = INDEX["OBXXXX1A_rle.dcm"]['ds'] +u8_1s_1f = dcmread(get_testdata_file("OBXXXX1A.dcm")) +u8_1s_2f_rle = INDEX["OBXXXX1A_rle_2frame.dcm"]['ds'] + + +class TimeEncodeRow: + def setup(self): + self.no_runs = 100000 + ds = u8_1s_1f_rle + arr = ds.pixel_array + self.row_arr = arr[0, :].ravel() + self.row = self.row_arr.tobytes() + + def time_rle(self): + for _ in range(self.no_runs): + encode_row(self.row) + + def time_default(self): + for _ in range(self.no_runs): + _rle_encode_row(self.row_arr) + +class TimeEncodeFrame: + def setup(self): + self.arr_u8_1s_1f = u8_1s_1f.pixel_array + self.bytes_u8_1s_1f = u8_1s_1f.PixelData + self.parms = ( + u8_1s_1f.Rows, + u8_1s_1f.Columns, + u8_1s_1f.SamplesPerPixel, + u8_1s_1f.BitsAllocated, + '<' + ) + self.no_runs = 100 + + def time_default(self): + for _ in range(self.no_runs): + rle_encode_frame(self.arr_u8_1s_1f) + + def time_rle(self): + for _ in range(self.no_runs): + encode_frame(self.bytes_u8_1s_1f, *self.parms) diff --git a/rle/tests/test_utils.py b/rle/tests/test_utils.py new file mode 100644 index 0000000..6126d4c --- /dev/null +++ b/rle/tests/test_utils.py @@ -0,0 +1,101 @@ +"""Tests for the utils module.""" + +import numpy as np +import pytest + +try: + from pydicom import dcmread + from pydicom.pixel_data_handlers.rle_handler import _rle_decode_frame + from pydicom.pixel_data_handlers.util import ( + pixel_dtype, reshape_pixel_array + ) + HAVE_PYDICOM = True +except ImportError: + HAVE_PYDICOM = False + +from rle.data import get_indexed_datasets +from rle.utils import encode_pixel_data, encode_array + + +INDEX_LEE = get_indexed_datasets('1.2.840.10008.1.2.1') + + +class TestEncodeArray: + """Tests for utils.encode_array().""" + def into_array(self, out, ds): + dtype = pixel_dtype(ds).newbyteorder('>') + arr = np.frombuffer(out, dtype) + + if ds.SamplesPerPixel == 1: + arr = arr.reshape(ds.Rows, ds.Columns) + else: + # RLE is planar configuration 1 + arr = np.reshape(arr, (ds.SamplesPerPixel, ds.Rows, ds.Columns)) + arr = arr.transpose(1, 2, 0) + + return arr + + def test_u8_1s_1f(self): + """Test encoding 8-bit, 1 sample/px, 1 frame.""" + ds = INDEX_LEE["OBXXXX1A.dcm"]['ds'] + ref = ds.pixel_array + gen = encode_array(ref, **{'ds': ds}) + b = next(gen) + + params = (ds.Rows, ds.Columns, ds.SamplesPerPixel, ds.BitsAllocated) + arr = self.into_array(_rle_decode_frame(b, *params), ds) + + assert np.array_equal(ref, arr) + + with pytest.raises(StopIteration): + next(gen) + + def test_u32_3s_2f(self): + """Test encoding 32-bit, 3 sample/px, 2 frame.""" + ds = INDEX_LEE["SC_rgb_32bit_2frame.dcm"]['ds'] + + ref = ds.pixel_array + gen = encode_array(ref, **{'ds': ds}) + b = next(gen) + + params = (ds.Rows, ds.Columns, ds.SamplesPerPixel, ds.BitsAllocated) + arr = self.into_array(_rle_decode_frame(b, *params), ds) + + assert np.array_equal(ref[0], arr) + + b = next(gen) + + params = (ds.Rows, ds.Columns, ds.SamplesPerPixel, ds.BitsAllocated) + arr = self.into_array(_rle_decode_frame(b, *params), ds) + + assert np.array_equal(ref[1], arr) + + with pytest.raises(StopIteration): + next(gen) + + def test_byteorder(self): + pass + + def test_missing_params(self): + pass + + +class TestEncodePixelData: + """Tests for utils.encode_pixel_data().""" + def test_missing_params(self): + pass + + def test_byteorder(self): + pass + + def test_bad_samples_raises(self): + pass + + def test_bad_bits_allocated_raises(self): + pass + + def test_bad_length_raises(self): + pass + + def test_too_many_segments_raises(self): + pass diff --git a/rle/utils.py b/rle/utils.py index c8f8f78..ce32c0d 100644 --- a/rle/utils.py +++ b/rle/utils.py @@ -51,7 +51,7 @@ def encode_array(arr: "np.ndarray", **kwargs) -> Generator[bytes, None, None]: * ``{'ds': pydicom.dataset.Dataset}``, which is the corresponding dataset, or * ``{'rows': int, 'columns': int, samples_per_px': int, - 'bits_per_px': int}``. + 'bits_per_px': int, 'nr_frames': int}``. Yields ------ @@ -64,11 +64,16 @@ def encode_array(arr: "np.ndarray", **kwargs) -> Generator[bytes, None, None]: kwargs['byteorder'] = byteorder - if getattr(ds, "NumberOfFrames", 1) > 1: + if 'ds' in kwargs: + nr_frames = getattr(kwargs['ds'], "NumberOfFrames", 1) + else: + nr_frames = kwargs['nr_frames'] + + if nr_frames > 1: for frame in arr: - yield encode_pixel_data(frame.tobytes(), kwargs) + yield encode_pixel_data(frame.tobytes(), **kwargs) else: - yield encode_pixel_data(arr.tobytes(), kwargs) + yield encode_pixel_data(arr.tobytes(), **kwargs) def encode_pixel_data(stream: bytes, **kwargs) -> bytes: @@ -106,6 +111,7 @@ def encode_pixel_data(stream: bytes, **kwargs) -> bytes: The RLE encoded frame. """ if 'ds' in kwargs: + ds = kwargs['ds'] r, c = ds.Rows, ds.Columns bpp = ds.BitsAllocated spp = ds.SamplesPerPixel @@ -121,7 +127,7 @@ def encode_pixel_data(stream: bytes, **kwargs) -> bytes: if spp not in [1, 3]: raise ValueError("'Samples per Pixel' must be 1 or 3") - if bpp * spp > 15: + if bpp / 8 * spp > 15: raise ValueError( "Unable to encode the data as the DICOM Standard blah blah" ) From e4cfc0b22bf5571cb0fb5755c945a298af270cbe Mon Sep 17 00:00:00 2001 From: scaramallion Date: Sun, 25 Apr 2021 19:39:01 +1000 Subject: [PATCH 15/19] Update benchmarks, README, add release notes --- README.md | 44 +++++++---- docs/release_notes/v1.1.0.rst | 11 +++ rle/benchmarks/bench_encode.py | 137 ++++++++++++++++++++++++++++----- src/lib.rs | 1 - 4 files changed, 155 insertions(+), 38 deletions(-) create mode 100644 docs/release_notes/v1.1.0.rst diff --git a/README.md b/README.md index 060b442..caf04b8 100644 --- a/README.md +++ b/README.md @@ -49,22 +49,16 @@ Time per 1000 decodes, pydicom's NumPy RLE handler vs. pylibjpeg-rle #### Encoding -Time per 1000 encodes +Time per 1000 encodes, pydicom's NumPy RLE handler vs. pylibjpeg-rle -| Dataset | Pixels | Bytes | NumPy | pylibjpeg-rle | -| --- | --- | --- | --- | --- | -| OBXXXX1A_rle.dcm | 480,000 | 480,000 | s | s | -| OBXXXX1A_rle_2frame.dcm | 960,000 | 960,000 | s | s | -| SC_rgb_rle.dcm | 10,000 | 30,000 | s | s | -| SC_rgb_rle_2frame.dcm | 20,000 | 60,000 | s | s | -| MR_small_RLE.dcm | 4,096 | 8,192 | s | s | -| emri_small_RLE.dcm | 40,960 | 81,920 | s | s | -| SC_rgb_rle_16bit.dcm | 10,000 | 60,000 | s | s | -| SC_rgb_rle_16bit_2frame.dcm | 20,000 | 120,000 | s | s | -| rtdose_rle_1frame.dcm | 100 | 400 | s | s | -| rtdose_rle.dcm | 1,500 | 6,000 | s | s | -| SC_rgb_rle_32bit.dcm | 10,000 | 120,000 | s | s | -| SC_rgb_rle_32bit_2frame.dcm | 20,000 | 240,000 | s | s | +| Dataset | Pixels | Bytes | NumPy | pylibjpeg-rle | +| --- | --- | --- | --- | --- | +| OBXXXX1A.dcm | 480,000 | 480,000 | 30.7 s | 1.36 s | +| SC_rgb.dcm | 10,000 | 30,000 | 1.80 s | 0.09 s | +| MR_small.dcm | 4,096 | 8,192 | 2.29 s | 0.04 s | +| SC_rgb_16bit.dcm | 10,000 | 60,000 | 3.57 s | 0.17 s | +| rtdose_1frame.dcm | 100 | 400 | 0.19 s | 0.003 s | +| SC_rgb_32bit.dcm | 10,000 | 120,000 | 7.20 s | 0.33 s | ### Usage #### Decoding @@ -96,5 +90,23 @@ for arr in generate_frames(ds): ``` #### Encoding -##### With pylibjpeg ##### Standalone with pydicom + +```python +from pydicom import dcmread +from pydicom.data import get_testdata_file +from pydicom.encaps import encapsulate +from pydicom.uid import RLELossless + +from rle import encode_array + +# Get an uncompressed numpy ndarray +ds = dcmread(get_testdata_file("OBXXXX1A.dcm")) +arr = ds.pixel_array + +# This is a bit silly -> switch ds in, kwargs pure optional +# `encode_array` is a generator function that yields encoded frames +ds.PixelData = encapsulate([enc for enc in encode_array(arr, **{'ds': ds})]) +ds.file_meta.TransferSyntaxUID = RLELossless + +``` diff --git a/docs/release_notes/v1.1.0.rst b/docs/release_notes/v1.1.0.rst new file mode 100644 index 0000000..78c0abd --- /dev/null +++ b/docs/release_notes/v1.1.0.rst @@ -0,0 +1,11 @@ +.. _v1.1.0: + +1.1.0 +===== + +Enhancements +............ + +* Added support for *RLE Lossless* encoding of *Pixel Data* +* Added :func:`~rle.utils.encode_array` generator for standalone encoding +* Added :func:`~rle.utils.encode_pixel_data` entry point for encoding diff --git a/rle/benchmarks/bench_encode.py b/rle/benchmarks/bench_encode.py index 60468d2..f02d7df 100644 --- a/rle/benchmarks/bench_encode.py +++ b/rle/benchmarks/bench_encode.py @@ -16,11 +16,18 @@ from rle._rle import encode_row, encode_frame INDEX = get_indexed_datasets(RLELossless) - - -u8_1s_1f_rle = INDEX["OBXXXX1A_rle.dcm"]['ds'] -u8_1s_1f = dcmread(get_testdata_file("OBXXXX1A.dcm")) -u8_1s_2f_rle = INDEX["OBXXXX1A_rle_2frame.dcm"]['ds'] +# 8/8-bit, 1 sample/pixel, 1 frame +EXPL_8_1_1F = get_testdata_file("OBXXXX1A.dcm") +# 8/8-bit, 3 sample/pixel, 1 frame +EXPL_8_3_1F = get_testdata_file("SC_rgb.dcm") +# 16/16-bit, 1 sample/pixel, 1 frame +EXPL_16_1_1F = get_testdata_file("MR_small.dcm") +# 16/16-bit, 3 sample/pixel, 1 frame +EXPL_16_3_1F = get_testdata_file("SC_rgb_16bit.dcm") +# 32/32-bit, 1 sample/pixel, 1 frame +EXPL_32_1_1F = get_testdata_file("rtdose_1frame.dcm") +# 32/32-bit, 3 sample/pixel, 1 frame +EXPL_32_3_1F = get_testdata_file("SC_rgb_32bit.dcm") class TimeEncodeRow: @@ -39,23 +46,111 @@ def time_default(self): for _ in range(self.no_runs): _rle_encode_row(self.row_arr) -class TimeEncodeFrame: + +class TimePYDEncodeFrame: + """Time tests for rle_handler.rle_encode_frame.""" + def setup(self): + ds = dcmread(EXPL_8_1_1F) + self.arr8_1 = ds.pixel_array + ds = dcmread(EXPL_8_3_1F) + self.arr8_3 = ds.pixel_array + ds = dcmread(EXPL_16_1_1F) + self.arr16_1 = ds.pixel_array + ds = dcmread(EXPL_16_3_1F) + self.arr16_3 = ds.pixel_array + ds = dcmread(EXPL_32_1_1F) + self.arr32_1 = ds.pixel_array + ds = dcmread(EXPL_32_3_1F) + self.arr32_3 = ds.pixel_array + + self.no_runs = 1000 + + def time_08_1(self): + """Time encoding 8 bit 1 sample/pixel.""" + for ii in range(self.no_runs): + rle_encode_frame(self.arr8_1) + + def time_08_3(self): + """Time encoding 8 bit 3 sample/pixel.""" + for ii in range(self.no_runs): + rle_encode_frame(self.arr8_3) + + def time_16_1(self): + """Time encoding 16 bit 1 sample/pixel.""" + for ii in range(self.no_runs): + rle_encode_frame(self.arr16_1) + + def time_16_3(self): + """Time encoding 16 bit 3 sample/pixel.""" + for ii in range(self.no_runs): + rle_encode_frame(self.arr16_3) + + def time_32_1(self): + """Time encoding 32 bit 1 sample/pixel.""" + for ii in range(self.no_runs): + rle_encode_frame(self.arr32_1) + + def time_32_3(self): + """Time encoding 32 bit 3 sample/pixel.""" + for ii in range(self.no_runs): + rle_encode_frame(self.arr32_3) + + +class TimeRLEEncodeFrame: def setup(self): - self.arr_u8_1s_1f = u8_1s_1f.pixel_array - self.bytes_u8_1s_1f = u8_1s_1f.PixelData - self.parms = ( - u8_1s_1f.Rows, - u8_1s_1f.Columns, - u8_1s_1f.SamplesPerPixel, - u8_1s_1f.BitsAllocated, - '<' + ds = dcmread(EXPL_8_1_1F) + self.ds8_1 = ( + ds.PixelData, ds.Rows, ds.Columns, ds.SamplesPerPixel, ds.BitsAllocated, + ) + ds = dcmread(EXPL_8_3_1F) + self.ds8_3 = ( + ds.PixelData, ds.Rows, ds.Columns, ds.SamplesPerPixel, ds.BitsAllocated, + ) + ds = dcmread(EXPL_16_1_1F) + self.ds16_1 = ( + ds.PixelData, ds.Rows, ds.Columns, ds.SamplesPerPixel, ds.BitsAllocated, + ) + ds = dcmread(EXPL_16_3_1F) + self.ds16_3 = ( + ds.PixelData, ds.Rows, ds.Columns, ds.SamplesPerPixel, ds.BitsAllocated, + ) + ds = dcmread(EXPL_32_1_1F) + self.ds32_1 = ( + ds.PixelData, ds.Rows, ds.Columns, ds.SamplesPerPixel, ds.BitsAllocated, + ) + ds = dcmread(EXPL_32_3_1F) + self.ds32_3 = ( + ds.PixelData, ds.Rows, ds.Columns, ds.SamplesPerPixel, ds.BitsAllocated, ) - self.no_runs = 100 - def time_default(self): - for _ in range(self.no_runs): - rle_encode_frame(self.arr_u8_1s_1f) + self.no_runs = 1000 - def time_rle(self): - for _ in range(self.no_runs): - encode_frame(self.bytes_u8_1s_1f, *self.parms) + def time_08_1(self): + """Time encoding 8 bit 1 sample/pixel.""" + for ii in range(self.no_runs): + encode_frame(*self.ds8_1, '<') + + def time_08_3(self): + """Time encoding 8 bit 3 sample/pixel.""" + for ii in range(self.no_runs): + encode_frame(*self.ds8_3, '<') + + def time_16_1(self): + """Time encoding 16 bit 1 sample/pixel.""" + for ii in range(self.no_runs): + encode_frame(*self.ds16_1, '<') + + def time_16_3(self): + """Time encoding 16 bit 3 sample/pixel.""" + for ii in range(self.no_runs): + encode_frame(*self.ds16_3, '<') + + def time_32_1(self): + """Time encoding 32 bit 1 sample/pixel.""" + for ii in range(self.no_runs): + encode_frame(*self.ds32_1, '<') + + def time_32_3(self): + """Time encoding 32 bit 3 sample/pixel.""" + for ii in range(self.no_runs): + encode_frame(*self.ds32_3, '<') diff --git a/src/lib.rs b/src/lib.rs index 4bbffdb..dcdd5e4 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -502,7 +502,6 @@ fn _encode_frame( let r = usize::try_from(rows).unwrap(); let c = usize::try_from(cols).unwrap(); - println!("{}", src.len()); if src.len() != r * c * usize::from(spp * bytes_per_pixel) { return err_invalid_parameters } From d3f27155b1b7f71aaa5994a2aceecc7f33c49bc6 Mon Sep 17 00:00:00 2001 From: scaramallion Date: Mon, 26 Apr 2021 10:42:49 +1000 Subject: [PATCH 16/19] Add option for byteorder when decoding, default to little-endian decoding --- docs/release_notes/v1.1.0.rst | 9 + rle/tests/test_decode.py | 121 ++++++++---- rle/utils.py | 33 ++-- src/lib.rs | 352 ++++++++++++++++++---------------- 4 files changed, 296 insertions(+), 219 deletions(-) diff --git a/docs/release_notes/v1.1.0.rst b/docs/release_notes/v1.1.0.rst index 78c0abd..67fb2b7 100644 --- a/docs/release_notes/v1.1.0.rst +++ b/docs/release_notes/v1.1.0.rst @@ -9,3 +9,12 @@ Enhancements * Added support for *RLE Lossless* encoding of *Pixel Data* * Added :func:`~rle.utils.encode_array` generator for standalone encoding * Added :func:`~rle.utils.encode_pixel_data` entry point for encoding +* Added the ability to return decoded data in either little or big endian + ordering + +Changes +....... + +* :func:`~rle.utils.pixel_array`, :func:`~rle.utils.generate_frames` and + :func:`~rle.utils.decode_pixel_data` now return little-endian ordered + ndarrays diff --git a/rle/tests/test_decode.py b/rle/tests/test_decode.py index f0063bc..944cd6b 100644 --- a/rle/tests/test_decode.py +++ b/rle/tests/test_decode.py @@ -74,46 +74,45 @@ def as_bytes(self, offsets): def test_bits_allocated_zero_raises(self): """Test exception raised for BitsAllocated 0.""" msg = ( - r"The \(0028,0100\) 'Bits Allocated' value must be greater than 0" + r"The \(0028,0100\) 'Bits Allocated' value must be 8, 16, 32 or 64" ) with pytest.raises(ValueError, match=msg): - decode_frame(b'\x00\x00\x00\x00', 1, 0) + decode_frame(b'\x00\x00\x00\x00', 1, 0, '<') def test_bits_allocated_not_octal_raises(self): """Test exception raised for BitsAllocated not a multiple of 8.""" msg = ( - r"The \(0028,0100\) 'Bits Allocated' value must be a multiple of 8" + r"The \(0028,0100\) 'Bits Allocated' value must be 8, 16, 32 or 64" ) with pytest.raises(ValueError, match=msg): - decode_frame(b'\x00\x00\x00\x00', 1, 12) + decode_frame(b'\x00\x00\x00\x00', 1, 12, '<') def test_bits_allocated_large_raises(self): """Test exception raised for BitsAllocated greater than 64.""" msg = ( - r"A \(0028,0100\) 'Bits Allocated' value greater than " - r"64 is not supported" + r"The \(0028,0100\) 'Bits Allocated' value must be 8, 16, 32 or 64" ) with pytest.raises(ValueError, match=msg): - decode_frame(b'\x00\x00\x00\x00', 1, 72) + decode_frame(b'\x00\x00\x00\x00', 1, 72, '<') def test_insufficient_data_for_header_raises(self): """Test exception raised if insufficient data.""" msg = r"Frame is not long enough to contain RLE encoded data" with pytest.raises(ValueError, match=msg): - decode_frame(b'\x00\x00\x00\x00', 1, 8) + decode_frame(b'\x00\x00\x00\x00', 1, 8, '<') def test_no_data_raises(self): """Test exception raised if no data.""" msg = r"Frame is not long enough to contain RLE encoded data" with pytest.raises(ValueError, match=msg): - decode_frame(b'', 1, 8) + decode_frame(b'', 1, 8, '<') def test_invalid_first_offset_raises(self): """Test exception if invalid first offset.""" msg = r"Invalid segment offset found in the RLE header" d = self.as_bytes([0]) with pytest.raises(ValueError, match=msg): - decode_frame(d, 1, 8) + decode_frame(d, 1, 8, '<') def test_insufficient_data_for_offsets_raises(self): """Test exception if invalid first offset.""" @@ -121,21 +120,21 @@ def test_insufficient_data_for_offsets_raises(self): # Offset 64 with length 64 d = self.as_bytes([64]) with pytest.raises(ValueError, match=msg): - decode_frame(d, 1, 8) + decode_frame(d, 1, 8, '<') def test_non_increasing_offsets_raises(self): """Test exception if offsets not in increasing order.""" msg = r"Invalid segment offset found in the RLE header" d = self.as_bytes([64, 70, 68]) with pytest.raises(ValueError, match=msg): - decode_frame(d, 1, 8) + decode_frame(d, 1, 8, '<') def test_invalid_samples_px_raises(self): """Test exception if samples per px not 1 or 3.""" msg = r"The \(0028,0002\) 'Samples per Pixel' must be 1 or 3" d = self.as_bytes([64, 70]) with pytest.raises(ValueError, match=msg): - decode_frame(d + b'\x00' * 8, 1, 8) + decode_frame(d + b'\x00' * 8, 1, 8, '<') def test_insufficient_frame_literal_raises(self): """Test exception if frame not large enough to hold segment on lit.""" @@ -145,7 +144,7 @@ def test_insufficient_frame_literal_raises(self): ) d = self.as_bytes([64]) with pytest.raises(ValueError, match=msg): - decode_frame(d + b'\x00' * 8, 1, 8) + decode_frame(d + b'\x00' * 8, 1, 8, '<') def test_insufficient_frame_copy_raises(self): """Test exception if frame not large enough to hold segment on copy.""" @@ -155,7 +154,7 @@ def test_insufficient_frame_copy_raises(self): ) d = self.as_bytes([64]) with pytest.raises(ValueError, match=msg): - decode_frame(d + b'\xff\x00\x00', 1, 8) + decode_frame(d + b'\xff\x00\x00', 1, 8, '<') def test_insufficient_segment_copy_raises(self): """Test exception if insufficient segment data on copy.""" @@ -165,7 +164,7 @@ def test_insufficient_segment_copy_raises(self): ) d = self.as_bytes([64]) with pytest.raises(ValueError, match=msg): - decode_frame(d + b'\xff', 8, 8) + decode_frame(d + b'\xff', 8, 8, '<') def test_insufficient_segment_literal_raises(self): """Test exception if insufficient segment data on literal.""" @@ -175,21 +174,39 @@ def test_insufficient_segment_literal_raises(self): ) d = self.as_bytes([64]) with pytest.raises(ValueError, match=msg): - decode_frame(d + b'\x0a' * 8, 12, 8) + decode_frame(d + b'\x0a' * 8, 12, 8, '<') + + def test_invalid_byteorder_raises(self): + """Test exception if invalid byteorder.""" + header = ( + b'\x01\x00\x00\x00' + b'\x40\x00\x00\x00' + ) + header += (64 - len(header)) * b'\x00' + # 2 x 3 data + # 0, 64, 128, 160, 192, 255 + data = b'\x05\x00\x40\x80\xA0\xC0\xFF' + + # Ok with u8 + decode_frame(header + data, 2 * 3, 8, '=') + + msg = r"'byteorder' must be '>' or '<'" + with pytest.raises(ValueError, match=msg): + decode_frame(header + data, 1 * 3, 16, '=') def test_decoded_segment_length_short(self): """Test exception if decoded segment length invalid.""" msg = r"The decoded segment length does not match the expected length" d = self.as_bytes([64]) with pytest.raises(ValueError, match=msg): - decode_frame(d + b'\x00' * 8, 12, 8) + decode_frame(d + b'\x00' * 8, 12, 8, '<') def test_decoded_segment_length_long(self): """Test exception if decoded segment length invalid.""" msg = r"The decoded segment length does not match the expected length" d = self.as_bytes([64, 72]) with pytest.raises(ValueError, match=msg): - decode_frame(d + b'\x00' * 20, 8, 16) + decode_frame(d + b'\x00' * 20, 8, 16, '<') def test_u8_1s(self): """Test decoding 8-bit, 1 sample/pixel.""" @@ -201,8 +218,14 @@ def test_u8_1s(self): # 2 x 3 data # 0, 64, 128, 160, 192, 255 data = b'\x05\x00\x40\x80\xA0\xC0\xFF' - decoded = decode_frame(header + data, 2 * 3, 8) - arr = np.frombuffer(decoded, np.dtype('>u1')) + # Big endian + decoded = decode_frame(header + data, 2 * 3, 8, '>') + arr = np.frombuffer(decoded, np.dtype('uint8')) + assert [0, 64, 128, 160, 192, 255] == arr.tolist() + + # Little-endian + decoded = decode_frame(header + data, 2 * 3, 8, '<') + arr = np.frombuffer(decoded, np.dtype('uint8')) assert [0, 64, 128, 160, 192, 255] == arr.tolist() def test_u8_3s(self): @@ -221,8 +244,8 @@ def test_u8_3s(self): b'\x05\xFF\xC0\x80\x40\x00\xFF' # B b'\x05\x01\x40\x80\xA0\xC0\xFE' # G ) - decoded = decode_frame(header + data, 2 * 3, 8) - arr = np.frombuffer(decoded, np.dtype('>u1')) + decoded = decode_frame(header + data, 2 * 3, 8, '<') + arr = np.frombuffer(decoded, np.dtype('uint8')) # Ordered all R, all G, all B assert [0, 64, 128, 160, 192, 255] == arr[:6].tolist() assert [255, 192, 128, 64, 0, 255] == arr[6:12].tolist() @@ -242,10 +265,16 @@ def test_u16_1s(self): b'\x05\x00\x00\x01\x00\xFF\xFF' # MSB b'\x05\x00\x01\x00\xFF\x00\xFF' # LSB ) - decoded = decode_frame(header + data, 2 * 3, 16) + # Big-endian output + decoded = decode_frame(header + data, 2 * 3, 16, '>') arr = np.frombuffer(decoded, np.dtype('>u2')) assert [0, 1, 256, 255, 65280, 65535] == arr.tolist() + # Little-endian output + decoded = decode_frame(header + data, 2 * 3, 16, '<') + arr = np.frombuffer(decoded, np.dtype('') arr = np.frombuffer(decoded, np.dtype('>u2')) assert [0, 1, 256, 255, 65280, 65535] == arr[:6].tolist() assert [65535, 1, 256, 255, 65280, 0] == arr[6:12].tolist() assert [1, 1, 256, 255, 65280, 65534] == arr[12:].tolist() + # Little-endian output + decoded = decode_frame(header + data, 2 * 3, 16, '<') + arr = np.frombuffer(decoded, np.dtype('') arr = np.frombuffer(decoded, np.dtype('>u4')) assert [0, 16777216, 65536, 256, 1, 4294967295] == arr.tolist() + # Little-endian output + decoded = decode_frame(header + data, 2 * 3, 32, '<') + arr = np.frombuffer(decoded, np.dtype('') arr = np.frombuffer(decoded, np.dtype('>u4')) assert [0, 16777216, 65536, 256, 1, 4294967295] == arr[:6].tolist() assert [4294967295, 16777216, 65536, 256, 1, 0] == arr[6:12].tolist() assert [1, 16777216, 65536, 256, 1, 4294967294] == arr[12:].tolist() + # Little-endian output + decoded = decode_frame(header + data, 2 * 3, 32, '<') + arr = np.frombuffer(decoded, np.dtype('i2' == arr.dtype + assert 'u2' == arr.dtype + assert 'u2' == arr.dtype + assert 'u2' == arr.dtype + assert 'u4' == arr.dtype + assert 'u4' == arr.dtype + assert 'u4' == arr.dtype + assert 'u4' == arr.dtype + assert ' "np.ndarray": ------- numpy.ndarray A 1D array of ``numpy.uint8`` containing the decoded frame data, - with big-endian encoding and planar configuration 1. + with little-endian encoding and planar configuration 1. Raises ------ @@ -33,7 +33,7 @@ def decode_pixel_data(stream: bytes, ds: "Dataset") -> "np.ndarray": If the decoding failed. """ return np.frombuffer( - decode_frame(stream, ds.Rows * ds.Columns, ds.BitsAllocated), + decode_frame(stream, ds.Rows * ds.Columns, ds.BitsAllocated, '<'), dtype='uint8' ) @@ -140,9 +140,7 @@ def encode_pixel_data(stream: bytes, **kwargs) -> bytes: return encode_frame(stream, r, c, spp, bpp, kwargs['byteorder']) -def generate_frames( - ds: "Dataset", reshape: bool = True, rle_segment_order: str = '>' -) -> "np.ndarray": +def generate_frames(ds: "Dataset", reshape: bool = True) -> "np.ndarray": """Yield a *Pixel Data* frame from `ds` as an :class:`~numpy.ndarray`. Parameters @@ -155,19 +153,11 @@ def generate_frames( If ``True`` (default), then the returned :class:`~numpy.ndarray` will be reshaped to the correct dimensions. If ``False`` then no reshaping will be performed. - rle_segment_order : str - The order of segments used by the RLE decoder when dealing with *Bits - Allocated* > 8. Each RLE segment contains 8-bits of the pixel data, - and segments are supposed to be ordered from MSB to LSB. A value of - ``'>'`` means interpret the segments as being in big endian order - (default) while a value of ``'<'`` means interpret the segments as - being in little endian order which may be possible if the encoded data - is non-conformant. Yields ------- numpy.ndarray - A single frame of (7FE0,0010) *Pixel Data* as an + A single frame of (7FE0,0010) *Pixel Data* as a little-endian ordered :class:`~numpy.ndarray` with an appropriate dtype for the data. Raises @@ -204,9 +194,9 @@ def generate_frames( r, c = ds.Rows, ds.Columns bpp = ds.BitsAllocated - dtype = pixel_dtype(ds).newbyteorder(rle_segment_order) + dtype = pixel_dtype(ds) for frame in generate_pixel_data_frame(ds.PixelData, nr_frames): - arr = np.frombuffer(decode_frame(frame, r * c, bpp), dtype=dtype) + arr = np.frombuffer(decode_frame(frame, r * c, bpp, '<'), dtype=dtype) if not reshape: yield arr @@ -233,10 +223,10 @@ def pixel_array(ds: "Dataset") -> "np.ndarray": Returns ------- numpy.ndarray - The contents of (7FE0,0010) *Pixel Data* as an :class:`~numpy.ndarray` - with shape (rows, columns), (rows, columns, components), (frames, - rows, columns), or (frames, rows, columns, components) depending on - the dataset. + The contents of (7FE0,0010) *Pixel Data* as a little-endian ordered + :class:`~numpy.ndarray` with shape (rows, columns), (rows, columns, + components), (frames, rows, columns), or (frames, rows, columns, + components) depending on the dataset. """ from pydicom.pixel_data_handlers.util import ( get_expected_length, reshape_pixel_array, pixel_dtype @@ -245,8 +235,7 @@ def pixel_array(ds: "Dataset") -> "np.ndarray": expected_len = get_expected_length(ds, 'pixels') frame_len = expected_len // getattr(ds, "NumberOfFrames", 1) # Empty destination array for our decoded pixel data - dtype = pixel_dtype(ds).newbyteorder('>') - arr = np.empty(expected_len, dtype) + arr = np.empty(expected_len, pixel_dtype(ds)) generate_offsets = range(0, expected_len, frame_len) for frame, offset in zip(generate_frames(ds, False), generate_offsets): diff --git a/src/lib.rs b/src/lib.rs index dcdd5e4..d6e099f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -8,17 +8,31 @@ use pyo3::types::{PyBytes, PyByteArray}; use pyo3::exceptions::{PyValueError}; -//use rayon::prelude::*; +// Python _rle module members +#[pymodule] +fn _rle(_: Python, m: &PyModule) -> PyResult<()> { + m.add_function(wrap_pyfunction!(parse_header, m)?).unwrap(); + m.add_function(wrap_pyfunction!(decode_segment, m)?).unwrap(); + m.add_function(wrap_pyfunction!(decode_frame, m)?).unwrap(); + + m.add_function(wrap_pyfunction!(encode_row, m)?).unwrap(); + m.add_function(wrap_pyfunction!(encode_segment, m)?).unwrap(); + m.add_function(wrap_pyfunction!(encode_frame, m)?).unwrap(); + + Ok(()) +} + // RLE Decoding +// ------------ #[pyfunction] -fn parse_header(enc: &[u8]) -> PyResult> { +fn parse_header(src: &[u8]) -> PyResult> { /* Return the segment offsets from the RLE header. Parameters ---------- - b : bytes + src : bytes The 64 byte RLE header. Returns @@ -26,11 +40,11 @@ fn parse_header(enc: &[u8]) -> PyResult> { List[int] All 15 segment offsets found in the header. */ - if enc.len() != 64 { + if src.len() != 64 { return Err(PyValueError::new_err("The RLE header must be 64 bytes long")) } - let header = <&[u8; 64]>::try_from(&enc[0..64]).unwrap(); + let header = <&[u8; 64]>::try_from(&src[0..64]).unwrap(); let mut offsets: Vec = Vec::new(); offsets.extend(&_parse_header(header)[..]); @@ -38,55 +52,59 @@ fn parse_header(enc: &[u8]) -> PyResult> { } -fn _parse_header(b: &[u8; 64]) -> [u32; 15] { +fn _parse_header(src: &[u8; 64]) -> [u32; 15] { /* Return the segment offsets from the RLE header. Parameters ---------- - b + src The 64 byte RLE header. */ return [ - u32::from_le_bytes([ b[4], b[5], b[6], b[7]]), - u32::from_le_bytes([ b[8], b[9], b[10], b[11]]), - u32::from_le_bytes([b[12], b[13], b[14], b[15]]), - u32::from_le_bytes([b[16], b[17], b[18], b[19]]), - u32::from_le_bytes([b[20], b[21], b[22], b[23]]), - u32::from_le_bytes([b[24], b[25], b[26], b[27]]), - u32::from_le_bytes([b[28], b[29], b[30], b[31]]), - u32::from_le_bytes([b[32], b[33], b[34], b[35]]), - u32::from_le_bytes([b[36], b[37], b[38], b[39]]), - u32::from_le_bytes([b[40], b[41], b[42], b[43]]), - u32::from_le_bytes([b[44], b[45], b[46], b[47]]), - u32::from_le_bytes([b[48], b[49], b[50], b[51]]), - u32::from_le_bytes([b[52], b[53], b[54], b[55]]), - u32::from_le_bytes([b[56], b[57], b[58], b[59]]), - u32::from_le_bytes([b[60], b[61], b[62], b[63]]) + u32::from_le_bytes([ src[4], src[5], src[6], src[7]]), + u32::from_le_bytes([ src[8], src[9], src[10], src[11]]), + u32::from_le_bytes([src[12], src[13], src[14], src[15]]), + u32::from_le_bytes([src[16], src[17], src[18], src[19]]), + u32::from_le_bytes([src[20], src[21], src[22], src[23]]), + u32::from_le_bytes([src[24], src[25], src[26], src[27]]), + u32::from_le_bytes([src[28], src[29], src[30], src[31]]), + u32::from_le_bytes([src[32], src[33], src[34], src[35]]), + u32::from_le_bytes([src[36], src[37], src[38], src[39]]), + u32::from_le_bytes([src[40], src[41], src[42], src[43]]), + u32::from_le_bytes([src[44], src[45], src[46], src[47]]), + u32::from_le_bytes([src[48], src[49], src[50], src[51]]), + u32::from_le_bytes([src[52], src[53], src[54], src[55]]), + u32::from_le_bytes([src[56], src[57], src[58], src[59]]), + u32::from_le_bytes([src[60], src[61], src[62], src[63]]) ] } #[pyfunction] fn decode_frame<'a>( - enc: &[u8], px_per_sample: u32, bits_per_px: u8, py: Python<'a> + src: &[u8], nr_pixels: u32, bpp: u8, byteorder: char, py: Python<'a> ) -> PyResult<&'a PyByteArray> { /* Return the decoded frame. Parameters ---------- - enc : bytes + src : bytes The RLE encoded frame. - px_per_sample : int - The number of pixels per sample (rows x columns). - bits_per_px : int - The number of bits per pixel, should be a multiple of 8. + nr_pixels : int + The total number of pixels in the frame (rows x columns), + maximum (2^32 - 1). + bpp : int + The number of bits per pixel, supported values 8, 16, 32, 64. + byteorder : str + The byte order of the returned data, '<' for little endian, '>' for + big endian. Returns ------- bytearray The decoded frame. */ - match _decode_frame(enc, px_per_sample, bits_per_px) { + match _decode_frame(src, nr_pixels, bpp, byteorder) { Ok(frame) => return Ok(PyByteArray::new(py, &frame)), Err(err) => return Err(PyValueError::new_err(err.to_string())), } @@ -94,35 +112,28 @@ fn decode_frame<'a>( fn _decode_frame( - enc: &[u8], px_per_sample: u32, bits_per_px: u8 + src: &[u8], nr_pixels: u32, bpp: u8, byteorder: char ) -> Result, Box> { /* Return the decoded frame. Parameters ---------- - enc + src The RLE encoded frame. - px_per_sample - The number of pixels per sample (rows x columns), maximum (2^32 - 1). - bits_per_px + nr_pixels + The total number of pixels in the frame (rows x columns). + bpp The number of bits per pixel, should be a multiple of 8 and no larger than 64. + byteorder + The byte order of the decoded data, '<' for little endian, '>' for + big endian. */ // Pre-define our errors for neatness - let err_bits_zero = Err( - String::from( - "The (0028,0100) 'Bits Allocated' value must be greater than 0" - ).into(), - ); - let err_bits_not_octal = Err( - String::from( - "The (0028,0100) 'Bits Allocated' value must be a multiple of 8" - ).into(), - ); - let err_invalid_bytes = Err( + let err_invalid_bits_allocated = Err( String::from( - "A (0028,0100) 'Bits Allocated' value greater than 64 is not supported" + "The (0028,0100) 'Bits Allocated' value must be 8, 16, 32 or 64" ).into() ); let err_invalid_offset = Err( @@ -139,27 +150,37 @@ fn _decode_frame( "The decoded segment length does not match the expected length" ).into() ); + let err_invalid_byteorder = Err( + String::from("'byteorder' must be '>' or '<'").into() + ); // Ensure we have a valid bits/px value - match bits_per_px { - 0 => return err_bits_zero, - _ => match bits_per_px % 8 { + match bpp { + 0 => return err_invalid_bits_allocated, + _ => match bpp % 8 { 0 => {}, - _ => return err_bits_not_octal + _ => return err_invalid_bits_allocated } } - // Ensure `bytes_per_pixel` is in [1, 8] - let bytes_per_pixel: u8 = bits_per_px / 8; - if bytes_per_pixel > 8 { return err_invalid_bytes } + // Ensure `bytes_per_pixel` is in [1, 2, 4, 8] + let bytes_per_pixel: u8 = bpp / 8; + match bytes_per_pixel { + 1 => {}, + 2 | 4 | 8 => match byteorder { + '>' | '<' => {}, + _ => return err_invalid_byteorder + }, + _ => return err_invalid_bits_allocated + } // Parse the RLE header and check results // -------------------------------------- // Ensure we have at least enough data for the RLE header - let encoded_length = enc.len(); + let encoded_length = src.len(); if encoded_length < 64 { return err_insufficient_data } - let header = <&[u8; 64]>::try_from(&enc[0..64]).unwrap(); + let header = <&[u8; 64]>::try_from(&src[0..64]).unwrap(); let all_offsets: [u32; 15] = _parse_header(header); // First offset must always be 64 @@ -186,15 +207,15 @@ fn _decode_frame( } // Check the samples per pixel is conformant - let samples_per_px: u8 = nr_segments / bytes_per_pixel; - match samples_per_px { + let spp: u8 = nr_segments / bytes_per_pixel; + match spp { 1 | 3 => {}, _ => return err_invalid_nr_samples } // Watch for overflow here; u32 * u32 -> u64 let expected_length = usize::try_from( - px_per_sample * u32::from(bytes_per_pixel * samples_per_px) + nr_pixels * u32::from(bytes_per_pixel * spp) ).unwrap(); // Pre-allocate a vector for the decoded frame @@ -219,16 +240,25 @@ fn _decode_frame( // Decode each segment and place it into the vector // ------------------------------------------------ - let pps = usize::try_from(px_per_sample).unwrap(); + let pps = usize::try_from(nr_pixels).unwrap(); // Concatenate sample planes into a frame - for sample in 0..samples_per_px { // 0 or (0, 1, 2) + for sample in 0..spp { // 0 or (0, 1, 2) // Sample offset let so = usize::from(sample * bytes_per_pixel) * pps; // Interleave the segments into a sample plane for byte_offset in 0..bytes_per_pixel { // 0, [1, 2, 3, ..., 7] - // idx should be in range [0, 23], but max is 15 - let idx = usize::from(sample * bytes_per_pixel + byte_offset); + // idx should be in range [0, 15] + let idx: usize; + if byteorder == '>' { // big-endian + // e.g. 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 + idx = usize::from(sample * bytes_per_pixel + byte_offset); + } else { // little-endian + // e.g. 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8 + idx = usize::from( + bytes_per_pixel - byte_offset + bytes_per_pixel * sample + ) - 1; + } // offsets[idx] is u32 -> usize not guaranteed let start = usize::try_from(offsets[idx]).unwrap(); @@ -236,7 +266,7 @@ fn _decode_frame( // Decode the segment into the frame let len = _decode_segment_into_frame( - <&[u8]>::try_from(&enc[start..end]).unwrap(), + <&[u8]>::try_from(&src[start..end]).unwrap(), &mut frame, usize::from(bytes_per_pixel), usize::from(byte_offset) + so @@ -250,15 +280,15 @@ fn _decode_frame( fn _decode_segment_into_frame( - enc: &[u8], frame: &mut Vec, bpp: usize, initial_offset: usize + src: &[u8], dst: &mut Vec, bpp: usize, initial_offset: usize ) -> Result> { /* Decode an RLE segment directly into a frame. Parameters ---------- - enc + src The encoded segment. - frame + dst The Vec for the decoded frame. bpp The number of bytes per pixel. @@ -273,8 +303,8 @@ fn _decode_segment_into_frame( let mut idx = initial_offset; let mut pos = 0; let mut header_byte: usize; - let max_offset = enc.len() - 1; - let max_frame = frame.len(); + let max_offset = src.len() - 1; + let max_frame = dst.len(); let mut op_len: usize; let err_eod = Err( String::from( @@ -292,7 +322,7 @@ fn _decode_segment_into_frame( loop { // `header_byte` is equivalent to N in the DICOM Standard // usize is at least u8 - header_byte = usize::from(enc[pos]); + header_byte = usize::from(src[pos]); pos += 1; if header_byte > 128 { // Extend by copying the next byte (-N + 1) times @@ -308,7 +338,7 @@ fn _decode_segment_into_frame( } for _ in 0..op_len { - frame[idx] = enc[pos]; + dst[idx] = src[pos]; idx += bpp; } pos += 1; @@ -324,7 +354,7 @@ fn _decode_segment_into_frame( } for ii in pos..pos + op_len { - frame[idx] = enc[ii]; + dst[idx] = src[ii]; idx += bpp; } pos += header_byte + 1; @@ -338,12 +368,12 @@ fn _decode_segment_into_frame( #[pyfunction] -fn decode_segment<'a>(enc: &[u8], py: Python<'a>) -> PyResult<&'a PyBytes> { +fn decode_segment<'a>(src: &[u8], py: Python<'a>) -> PyResult<&'a PyBytes> { /* Return a decoded RLE segment as bytes. Parameters ---------- - enc : bytes + src : bytes The encoded segment. Returns @@ -351,27 +381,27 @@ fn decode_segment<'a>(enc: &[u8], py: Python<'a>) -> PyResult<&'a PyBytes> { bytes The decoded segment. */ - let mut segment: Vec = Vec::new(); - match _decode_segment(enc, &mut segment) { - Ok(()) => return Ok(PyBytes::new(py, &segment[..])), + let mut dst: Vec = Vec::new(); + match _decode_segment(src, &mut dst) { + Ok(()) => return Ok(PyBytes::new(py, &dst[..])), Err(err) => return Err(PyValueError::new_err(err.to_string())), } } -fn _decode_segment(enc: &[u8], out: &mut Vec) -> Result<(), Box> { +fn _decode_segment(src: &[u8], dst: &mut Vec) -> Result<(), Box> { /* Decode an RLE segment. Parameters ---------- - enc + src The encoded segment. - out + dst A Vec for the decoded segment. */ let mut pos = 0; let mut header_byte: usize; - let max_offset = enc.len() - 1; + let max_offset = src.len() - 1; let err = Err( String::from( "The end of the data was reached before the segment was \ @@ -382,7 +412,7 @@ fn _decode_segment(enc: &[u8], out: &mut Vec) -> Result<(), Box> loop { // `header_byte` is equivalent to N in the DICOM Standard // usize is at least u8 - header_byte = usize::from(enc[pos]); + header_byte = usize::from(src[pos]); pos += 1; if header_byte > 128 { if pos > max_offset { @@ -391,14 +421,14 @@ fn _decode_segment(enc: &[u8], out: &mut Vec) -> Result<(), Box> // Extend by copying the next byte (-N + 1) times // however since using uint8 instead of int8 this will be // (256 - N + 1) times - out.extend(vec![enc[pos]; 257 - header_byte]); + dst.extend(vec![src[pos]; 257 - header_byte]); pos += 1; } else if header_byte < 128 { if (pos + header_byte) > max_offset { return err } // Extend by literally copying the next (N + 1) bytes - out.extend(&enc[pos..(pos + header_byte + 1)]); + dst.extend(&src[pos..(pos + header_byte + 1)]); pos += header_byte + 1; } // header_byte == 128 is noop @@ -410,13 +440,38 @@ fn _decode_segment(enc: &[u8], out: &mut Vec) -> Result<(), Box> // RLE Encoding +// ------------ + #[pyfunction] fn encode_frame<'a>( - enc: &[u8], rows: u16, cols: u16, spp: u8, bpp: u8, byteorder: char, py: Python<'a> + src: &[u8], rows: u16, cols: u16, spp: u8, bpp: u8, byteorder: char, py: Python<'a> ) -> PyResult<&'a PyBytes> { - // Pre-allocate some space for the encoded data - worst case scenario + /* Return RLE encoded `src` as bytes. + + Parameters + ---------- + src : bytes + The data to be RLE encoded, ordered as R1, G1, B1, R2, G2, B2, ..., + Rn, Gn, Bn (i.e. Planar Configuration 0). + rows : int + The number of rows in the data. + cols : int + The number of columns in the data. + spp : int + The number of samples per pixel, supported values are 1 or 3. + bpp : int + The number of bits per pixel, supported values are 8, 16, 32 and 64. + byteorder : str + Required if `bpp` is greater than 1, '>' if `src` is in big endian byte + order, '<' if little endian. + + Returns + ------- + bytes + The RLE encoded frame. + */ let mut dst: Vec = Vec::new(); - match _encode_frame(enc, &mut dst, rows, cols, spp, bpp, byteorder) { + match _encode_frame(src, &mut dst, rows, cols, spp, bpp, byteorder) { Ok(()) => return Ok(PyBytes::new(py, &dst[..])), Err(err) => return Err(PyValueError::new_err(err.to_string())), } @@ -431,10 +486,10 @@ fn _encode_frame( Parameters ---------- src - The data to be RLE encoded, ordered as R1, G1, B1, R2, G2, B2, ..., Rn, Gn, Bn - (i.e. Planar Configuration 0). + The data to be RLE encoded, ordered as R1, G1, B1, R2, G2, B2, ..., + Rn, Gn, Bn (i.e. Planar Configuration 0). dst - + The vector storing the encoded data. rows The number of rows in the data. cols @@ -525,16 +580,15 @@ fn _encode_frame( } } + // Encode the data and update the RLE header segment offsets for idx in 0..usize::from(nr_segments) { - // Convert to 4x le ordered u8s and update RLE header + // Update RLE header: convert current offset to 4x le ordered u8s let current_offset = (u32::try_from(dst.len()).unwrap()).to_le_bytes(); for ii in idx * 4 + 4..idx * 4 + 8 { - // idx = 0: 4, 5, 6, 7 -> 0, 1, 2, 3 - // idx = 1: 8, 9, 10, 11 -> 0, 1, 2, 3 dst[ii] = current_offset[ii - idx * 4 - 4]; } - // Note the offset start of the `src` iter + // Encode! Note the offset start of the `src` iter let segment: Vec = src[start_indices[idx]..] .into_iter() .step_by(usize::from(spp * bytes_per_pixel)) @@ -548,20 +602,10 @@ fn _encode_frame( } -#[pyfunction] -fn encode_segment<'a>(src: &[u8], cols: u16, py: Python<'a>) -> PyResult<&'a PyBytes> { - let mut dst = Vec::new(); - match _encode_segment_from_array(src, &mut dst, cols) { - Ok(()) => return Ok(PyBytes::new(py, &dst[..])), - Err(err) => return Err(PyValueError::new_err(err.to_string())), - } -} - - -fn _encode_segment_from_array( - src: &[u8], dst: &mut Vec, cols: u16 +fn _encode_segment_from_vector( + src: Vec, dst: &mut Vec, cols: u16 ) -> Result<(), Box> { - /* + /* RLE encode a segment. Parameters ---------- @@ -572,33 +616,45 @@ fn _encode_segment_from_array( cols The length of each row in the `src`. */ - let err_invalid_length = Err( - String::from("The (0028,0011) 'Columns' value is invalid").into() - ); - let row_len: usize = usize::try_from(cols).unwrap(); - - if src.len() % row_len != 0 { return err_invalid_length } - - let nr_rows = src.len() / row_len; - let mut offset: usize; - - for row_idx in 0..nr_rows { - offset = row_idx * row_len; + for row_idx in 0..(src.len() / row_len) { + let offset = row_idx * row_len; _encode_row(&src[offset..offset + row_len], dst)?; } // Each segment must be even length or padded to even length with zero - if dst.len() % 2 != 0 { - dst.push(0); - } + if dst.len() % 2 != 0 { dst.push(0); } Ok(()) } -fn _encode_segment_from_vector( - src: Vec, dst: &mut Vec, cols: u16 +#[pyfunction] +fn encode_segment<'a>(src: &[u8], cols: u16, py: Python<'a>) -> PyResult<&'a PyBytes> { + /* Return an RLE encoded segment as bytes. + + Parameters + ---------- + src : bytes + The segment data to be encoded. + cols : int + The length of each row in the `src`. + + Returns + ------- + bytes + An RLE encoded segment + */ + let mut dst = Vec::new(); + match _encode_segment_from_array(src, &mut dst, cols) { + Ok(()) => return Ok(PyBytes::new(py, &dst[..])), + Err(err) => return Err(PyValueError::new_err(err.to_string())), + } +} + + +fn _encode_segment_from_array( + src: &[u8], dst: &mut Vec, cols: u16 ) -> Result<(), Box> { /* @@ -638,7 +694,7 @@ fn _encode_segment_from_vector( #[pyfunction] fn encode_row<'a>(src: &[u8], py: Python<'a>) -> PyResult<&'a PyBytes> { - /* Return RLE encoded data as Python bytes. + /* Return `src` as RLE encoded bytes. Parameters ---------- @@ -650,11 +706,7 @@ fn encode_row<'a>(src: &[u8], py: Python<'a>) -> PyResult<&'a PyBytes> { bytes The RLE encoded data. */ - // Assuming all literal runs, `dst` can never be greater than - // ceil(src.len() / 128) + src.len() - - // Close enough... - let mut dst = Vec::with_capacity(src.len() + src.len() / 128 + 1); + let mut dst = Vec::new(); match _encode_row(src, &mut dst) { Ok(()) => return Ok(PyBytes::new(py, &dst[..])), Err(err) => return Err(PyValueError::new_err(err.to_string())), @@ -675,19 +727,13 @@ fn _encode_row(src: &[u8], dst: &mut Vec) -> Result<(), Box> { */ // Reminders: - // * Each image row is encoded separately - // * Literal runs are a non-repetitive stream - // * Replicate runs are a repetitive stream - // * 2 byte repeats are encoded as replicate runs // * Maximum length of literal/replicate runs is 128 bytes - - // Replicate run: dst += [count, value] - // count: number of bytes in the run (i8 = -replicate + 1) - // value: the value of the repeating byte - - // Literal run: dst += [count, a, b, c, ...] - // count: number of bytes in the literal stream (i8 = literal - 1) - // a, b, c, ...: the literal stream + // * Replicate run: dst += [count, value] + // count: number of bytes in the run (i8 = -replicate + 1) + // value: the value of the repeating byte + // * Literal run: dst += [count, a, b, c, ...] + // count: number of bytes in the literal stream (i8 = literal - 1) + // a, b, c, ...: the literal stream match src.len() { 0 => { return Ok(()) }, @@ -718,11 +764,6 @@ fn _encode_row(src: &[u8], dst: &mut Vec) -> Result<(), Box> { loop { current = src[ii]; - //println!( - // "Start of loop - ii: {}/{}, prv: {}, cur: {}, l: {}, r: {}", - // ii, src_length, previous, current, literal, replicate - //); - // Run type switching/control if current == previous { if literal == 1 { @@ -731,8 +772,8 @@ fn _encode_row(src: &[u8], dst: &mut Vec) -> Result<(), Box> { replicate = 1; } else if literal > 1 { // Write out literal run and reset + // `literal` must be at least 1 or we undeflow dst.push(literal - 1u8); - // Hmm, check the indexing here... dst.extend(&src[ii - usize::from(literal)..ii]); literal = 0; } @@ -763,7 +804,8 @@ fn _encode_row(src: &[u8], dst: &mut Vec) -> Result<(), Box> { } else if literal == max_run_length { // Write out literal run and reset dst.push(127); - // The indexing is different here because ii is the `current` item, not previous + // The indexing is different here because ii is the `current` + // item, not previous dst.extend(&src[ii + 1 - usize::from(literal)..ii + 1]); literal = 0; } // 128 is noop! @@ -795,17 +837,3 @@ fn _encode_row(src: &[u8], dst: &mut Vec) -> Result<(), Box> { Ok(()) } - - -#[pymodule] -fn _rle(_: Python, m: &PyModule) -> PyResult<()> { - m.add_function(wrap_pyfunction!(parse_header, m)?).unwrap(); - m.add_function(wrap_pyfunction!(decode_segment, m)?).unwrap(); - m.add_function(wrap_pyfunction!(decode_frame, m)?).unwrap(); - - m.add_function(wrap_pyfunction!(encode_row, m)?).unwrap(); - m.add_function(wrap_pyfunction!(encode_segment, m)?).unwrap(); - m.add_function(wrap_pyfunction!(encode_frame, m)?).unwrap(); - - Ok(()) -} From e7d167b4d0e2c0fea711712042d045624bbfe57f Mon Sep 17 00:00:00 2001 From: scaramallion Date: Mon, 26 Apr 2021 13:41:30 +1000 Subject: [PATCH 17/19] Add polish and tests --- README.md | 16 ++-- rle/tests/test_encode.py | 57 ++++++++++++++ rle/tests/test_utils.py | 165 ++++++++++++++++++++++++++++++++++----- rle/utils.py | 164 ++++++++++++++++++++++++++------------ 4 files changed, 325 insertions(+), 77 deletions(-) diff --git a/README.md b/README.md index caf04b8..435adcb 100644 --- a/README.md +++ b/README.md @@ -64,7 +64,7 @@ Time per 1000 encodes, pydicom's NumPy RLE handler vs. pylibjpeg-rle #### Decoding ##### With pylibjpeg -Because pydicom defaults to the NumPy RLE decoder, you must specify the use +Because pydicom defaults to its own RLE decoder you must specify the use of pylibjpeg when decompressing: ```python from pydicom import dcmread @@ -92,21 +92,21 @@ for arr in generate_frames(ds): #### Encoding ##### Standalone with pydicom +Convert uncompressed pixel data to RLE encoding and save: ```python from pydicom import dcmread from pydicom.data import get_testdata_file -from pydicom.encaps import encapsulate from pydicom.uid import RLELossless -from rle import encode_array +from rle import pixel_data -# Get an uncompressed numpy ndarray +# Get the uncompressed pixel data ds = dcmread(get_testdata_file("OBXXXX1A.dcm")) arr = ds.pixel_array -# This is a bit silly -> switch ds in, kwargs pure optional -# `encode_array` is a generator function that yields encoded frames -ds.PixelData = encapsulate([enc for enc in encode_array(arr, **{'ds': ds})]) +# RLE encode and encapsulate `arr` +ds.PixelData = pixel_data(arr, ds) +# Set the correct *Transfer Syntax UID* ds.file_meta.TransferSyntaxUID = RLELossless - +ds.save_as('as_rle.dcm') ``` diff --git a/rle/tests/test_encode.py b/rle/tests/test_encode.py index 22e44d5..258ed02 100644 --- a/rle/tests/test_encode.py +++ b/rle/tests/test_encode.py @@ -399,3 +399,60 @@ def test_32bit_segment_order(self): assert b'\x02\x00\xFF\xFC' == out[68:72] assert b'\x02\x00\xFE\x10' == out[72:76] assert b'\x02\x00\x0A\x12' == out[76:80] + + def test_invalid_samples_per_pixel_raises(self): + """Test exception raised if samples per pixel not valid.""" + msg = r"The \(0028,0002\) 'Samples per Pixel' must be 1 or 3" + with pytest.raises(ValueError, match=msg): + encode_frame(b'', 1, 1, 0, 1, '<') + + with pytest.raises(ValueError, match=msg): + encode_frame(b'', 1, 1, 2, 1, '<') + + with pytest.raises(ValueError, match=msg): + encode_frame(b'', 1, 1, 4, 1, '<') + + def test_invalid_bits_per_pixel_raises(self): + """Test exception raised if bits per pixel not valid.""" + msg = ( + r"The \(0028,0100\) 'Bits Allocated' value must be 8, 16, 32 or 64" + ) + with pytest.raises(ValueError, match=msg): + encode_frame(b'', 1, 1, 1, 0, '<') + + with pytest.raises(ValueError, match=msg): + encode_frame(b'', 1, 1, 1, 1, '<') + + with pytest.raises(ValueError, match=msg): + encode_frame(b'', 1, 1, 1, 7, '<') + + with pytest.raises(ValueError, match=msg): + encode_frame(b'', 1, 1, 1, 9, '<') + + with pytest.raises(ValueError, match=msg): + encode_frame(b'', 1, 1, 1, 65, '<') + + def test_invalid_byteorder_raises(self): + """Test exception raised if byteorder not valid.""" + msg = r"'byteorder' must be '>' or '<'" + with pytest.raises(ValueError, match=msg): + encode_frame(b'', 1, 1, 1, 16, '=') + + def test_invalid_parameters_raises(self): + """Test exception raised if parameters not valid.""" + msg = ( + r"The length of the data to be encoded is not consistent with the " + r"the values of the dataset's 'Rows', 'Columns', 'Samples per " + r"Pixel' and 'Bits Allocated' elements" + ) + with pytest.raises(ValueError, match=msg): + encode_frame(b'', 1, 1, 1, 16, '<') + + def test_invalid_nr_segments_raises(self): + """Test exception raised if too many segments required.""" + msg = ( + r"Unable to encode as the DICOM Standard only allows " + r"a maximum of 15 segments in RLE encoded data" + ) + with pytest.raises(ValueError, match=msg): + encode_frame(b'\x00' * 24, 1, 1, 3, 64, '<') diff --git a/rle/tests/test_utils.py b/rle/tests/test_utils.py index 6126d4c..d123f7b 100644 --- a/rle/tests/test_utils.py +++ b/rle/tests/test_utils.py @@ -5,26 +5,28 @@ try: from pydicom import dcmread + from pydicom.encaps import generate_pixel_data_frame from pydicom.pixel_data_handlers.rle_handler import _rle_decode_frame from pydicom.pixel_data_handlers.util import ( pixel_dtype, reshape_pixel_array ) + from pydicom.uid import RLELossless HAVE_PYDICOM = True except ImportError: HAVE_PYDICOM = False from rle.data import get_indexed_datasets -from rle.utils import encode_pixel_data, encode_array +from rle.utils import encode_pixel_data, encode_array, pixel_data, pixel_array INDEX_LEE = get_indexed_datasets('1.2.840.10008.1.2.1') +@pytest.mark.skipif(not HAVE_PYDICOM, reason="no pydicom") class TestEncodeArray: """Tests for utils.encode_array().""" def into_array(self, out, ds): - dtype = pixel_dtype(ds).newbyteorder('>') - arr = np.frombuffer(out, dtype) + arr = np.frombuffer(out, pixel_dtype(ds)) if ds.SamplesPerPixel == 1: arr = arr.reshape(ds.Rows, ds.Columns) @@ -39,7 +41,30 @@ def test_u8_1s_1f(self): """Test encoding 8-bit, 1 sample/px, 1 frame.""" ds = INDEX_LEE["OBXXXX1A.dcm"]['ds'] ref = ds.pixel_array - gen = encode_array(ref, **{'ds': ds}) + gen = encode_array(ref, ds) + b = next(gen) + + params = (ds.Rows, ds.Columns, ds.SamplesPerPixel, ds.BitsAllocated) + arr = self.into_array(_rle_decode_frame(b, *params), ds) + + assert np.array_equal(ref, arr) + + with pytest.raises(StopIteration): + next(gen) + + def test_u8_1s_1f_by_kwargs(self): + """Test encoding 8-bit, 1 sample/px, 1 frame by passing kwargs.""" + ds = INDEX_LEE["OBXXXX1A.dcm"]['ds'] + + kwargs = {} + kwargs['rows'] = ds.Rows + kwargs['columns'] = ds.Columns + kwargs['samples_per_px'] = ds.SamplesPerPixel + kwargs['bits_per_px'] = ds.BitsAllocated + kwargs['nr_frames'] = int(getattr(ds, "NumberOfFrames", 1)) + + ref = ds.pixel_array + gen = encode_array(ref, **kwargs) b = next(gen) params = (ds.Rows, ds.Columns, ds.SamplesPerPixel, ds.BitsAllocated) @@ -51,11 +76,11 @@ def test_u8_1s_1f(self): next(gen) def test_u32_3s_2f(self): - """Test encoding 32-bit, 3 sample/px, 2 frame.""" + """Test encoding LE-ordered 32-bit, 3 sample/px, 2 frame.""" ds = INDEX_LEE["SC_rgb_32bit_2frame.dcm"]['ds'] ref = ds.pixel_array - gen = encode_array(ref, **{'ds': ds}) + gen = encode_array(ref, ds) b = next(gen) params = (ds.Rows, ds.Columns, ds.SamplesPerPixel, ds.BitsAllocated) @@ -73,29 +98,131 @@ def test_u32_3s_2f(self): with pytest.raises(StopIteration): next(gen) - def test_byteorder(self): - pass + def test_u32_3s_2f_by_kwargs(self): + """Test encoding 32-bit, 3 sample/px, 2 frame by passing kwargs.""" + ds = INDEX_LEE["SC_rgb_32bit_2frame.dcm"]['ds'] + + kwargs = {} + kwargs['rows'] = ds.Rows + kwargs['columns'] = ds.Columns + kwargs['samples_per_px'] = ds.SamplesPerPixel + kwargs['bits_per_px'] = ds.BitsAllocated + kwargs['nr_frames'] = int(getattr(ds, "NumberOfFrames", 1)) + + ref = ds.pixel_array + gen = encode_array(ref, **kwargs) + b = next(gen) + + params = (ds.Rows, ds.Columns, ds.SamplesPerPixel, ds.BitsAllocated) + arr = self.into_array(_rle_decode_frame(b, *params), ds) + + assert np.array_equal(ref[0], arr) + + b = next(gen) - def test_missing_params(self): - pass + params = (ds.Rows, ds.Columns, ds.SamplesPerPixel, ds.BitsAllocated) + arr = self.into_array(_rle_decode_frame(b, *params), ds) + assert np.array_equal(ref[1], arr) + with pytest.raises(StopIteration): + next(gen) + + +@pytest.mark.skipif(not HAVE_PYDICOM, reason="no pydicom") class TestEncodePixelData: """Tests for utils.encode_pixel_data().""" - def test_missing_params(self): - pass - - def test_byteorder(self): - pass + def test_bad_byteorder_raises(self): + """Test exception raised if invalid byteorder.""" + kwargs = { + 'rows': 0, + 'columns': 0, + 'samples_per_px': 1, + 'bits_per_px': 16, + 'byteorder': '=', + } + + msg = ( + r"A valid 'byteorder' is required when the number of bits per " + r"pixel is greater than 8" + ) + with pytest.raises(ValueError, match=msg): + encode_pixel_data(b'', **kwargs) + + kwargs['byteorder'] = None + with pytest.raises(ValueError, match=msg): + encode_pixel_data(b'', **kwargs) def test_bad_samples_raises(self): - pass + """Test exception raised if invalid samples per pixel.""" + kwargs = { + 'rows': 0, + 'columns': 0, + 'samples_per_px': 0, + 'bits_per_px': 0, + 'byteorder': '<', + } + + msg = r"'samples_per_px' must be 1 or 3" + with pytest.raises(ValueError, match=msg): + encode_pixel_data(b'', **kwargs) def test_bad_bits_allocated_raises(self): - pass + """Test exception raised if invalid bits allocated.""" + kwargs = { + 'rows': 0, + 'columns': 0, + 'samples_per_px': 1, + 'bits_per_px': 2, + 'byteorder': '<', + } + + msg = r"'bits_per_px' must be 8, 16, 32 or 64" + with pytest.raises(ValueError, match=msg): + encode_pixel_data(b'', **kwargs) def test_bad_length_raises(self): - pass + """Test exception raised if invalid parameter values.""" + kwargs = { + 'rows': 1, + 'columns': 1, + 'samples_per_px': 1, + 'bits_per_px': 8, + 'byteorder': '<', + } + + msg = r"The length of the data doesn't match the image parameters" + with pytest.raises(ValueError, match=msg): + encode_pixel_data(b'', **kwargs) def test_too_many_segments_raises(self): - pass + """Test exception raised if too many segments.""" + kwargs = { + 'rows': 1, + 'columns': 1, + 'samples_per_px': 3, + 'bits_per_px': 64, + 'byteorder': '<', + } + + msg = ( + r"Unable to encode the data as the RLE format used by the DICOM " + r"Standard only allows a maximum of 15 segments" + ) + with pytest.raises(ValueError, match=msg): + encode_pixel_data(b'', **kwargs) + + +@pytest.mark.skipif(not HAVE_PYDICOM, reason="no pydicom") +class TestPixelData: + """Tests for utils.pixel_data().""" + def test_pixel_data(self): + """Test that data is encoded and encapsulated.""" + ds = INDEX_LEE["SC_rgb_32bit_2frame.dcm"]['ds'] + ref = ds.pixel_array + + data = pixel_data(ref, ds) + ds.file_meta.TransferSyntaxUID = RLELossless + ds.PixelData = data + + assert np.array_equal(ref, pixel_array(ds)) diff --git a/rle/utils.py b/rle/utils.py index bc94474..dea4003 100644 --- a/rle/utils.py +++ b/rle/utils.py @@ -1,57 +1,79 @@ """Utility functions.""" import sys -from typing import Generator +from typing import Generator, Optional import numpy as np from rle._rle import decode_frame, decode_segment, encode_frame, encode_segment -def decode_pixel_data(stream: bytes, ds: "Dataset") -> "np.ndarray": +def decode_pixel_data(src: bytes, ds: "Dataset", **kwargs) -> "np.ndarray": """Return the decoded RLE Lossless data as a :class:`numpy.ndarray`. Intended for use with *pydicom* ``Dataset`` objects. Parameters ---------- - stream : bytes - The image frame to be decoded. + src : bytes + A single encoded image frame to be decoded. ds : pydicom.dataset.Dataset A :class:`~pydicom.dataset.Dataset` containing the group ``0x0028`` - elements corresponding to the *Pixel Data*. + elements corresponding to the image frame. + kwargs : dict, optional + A dictionary containing options for the decoder. Current options are: + + * ``{'byteorder': str}`` specify the byte ordering for the decoded data + when more than 8 bits per pixel are used, should be '<' for little + endian ordering (default) or '>' for big-endian ordering. Returns ------- numpy.ndarray A 1D array of ``numpy.uint8`` containing the decoded frame data, - with little-endian encoding and planar configuration 1. + with planar configuration 1 and, by default, little-endian byte + ordering. Raises ------ ValueError If the decoding failed. """ + byteorder = kwargs.get('byteorder', '<') + return np.frombuffer( - decode_frame(stream, ds.Rows * ds.Columns, ds.BitsAllocated, '<'), + decode_frame(src, ds.Rows * ds.Columns, ds.BitsAllocated, byteorder), dtype='uint8' ) -def encode_array(arr: "np.ndarray", **kwargs) -> Generator[bytes, None, None]: +def encode_array( + arr: "np.ndarray", ds: Optional["Dataset"] = None, **kwargs +) -> Generator[bytes, None, None]: """Yield RLE encoded frames from `arr`. Parameters ---------- arr : numpy.ndarray - The array of data to be RLE encoded - kwargs : dict - A dictionary containing keyword arguments. Required keys are either: - - * ``{'ds': pydicom.dataset.Dataset}``, which is the corresponding - dataset, or - * ``{'rows': int, 'columns': int, samples_per_px': int, - 'bits_per_px': int, 'nr_frames': int}``. + The array of data to be RLE encoded, should be ordered as (frames, + rows, columns, planes), (rows, columns, planes), (frames, rows, + columns) or (rows, columns). + ds : pydicom.dataset.Dataset, optional + The dataset corresponding to `arr` with matching values for *Rows*, + *Columns*, *Samples per Pixel* and *Bits Allocated*. Required if + the array properties aren't specified using `kwargs`. + kwargs : dict, optional + A dictionary containing keyword arguments. Required if `ds` isn't used, + keys are: + + * ``{'rows': int, 'columns': int}`` the number of rows and columns + contained in `arr`. + * ``{samples_per_px': int}`` the number of samples per pixel, either + 1 for monochrome or 3 for RGB or similar data. + * ``{'bits_per_px': int}`` the number of bits needed to contain each + pixel, either 8, 16, 32 or 64. + * ``{'nr_frames': int}`` the number of frames in `arr`, required if + more than one frame is present. Yields ------ @@ -64,20 +86,27 @@ def encode_array(arr: "np.ndarray", **kwargs) -> Generator[bytes, None, None]: kwargs['byteorder'] = byteorder - if 'ds' in kwargs: - nr_frames = getattr(kwargs['ds'], "NumberOfFrames", 1) - else: - nr_frames = kwargs['nr_frames'] + if ds: + kwargs['rows'] = ds.Rows + kwargs['columns'] = ds.Columns + kwargs['samples_per_px'] = ds.SamplesPerPixel + kwargs['bits_per_px'] = ds.BitsAllocated + kwargs['nr_frames'] = int(getattr(ds, "NumberOfFrames", 1)) - if nr_frames > 1: + if kwargs['nr_frames'] > 1: for frame in arr: yield encode_pixel_data(frame.tobytes(), **kwargs) else: yield encode_pixel_data(arr.tobytes(), **kwargs) -def encode_pixel_data(stream: bytes, **kwargs) -> bytes: - """Return `stream` encoded using the DICOM RLE (PackBits) algorithm. +def encode_pixel_data( + src: bytes, + ds: Optional["Dataset"] = None, + byteorder: Optional[str] = None, + **kwargs +) -> bytes: + """Return `src` encoded using the DICOM RLE (PackBits) algorithm. .. warning:: @@ -87,57 +116,70 @@ def encode_pixel_data(stream: bytes, **kwargs) -> bytes: Parameters ---------- - stream : bytes - The image frame data to be RLE encoded. Only 1 frame can be encoded - at a time. + src : bytes + The data for a single image frame data to be RLE encoded. + ds : pydicom.dataset.Dataset, optional + The dataset corresponding to `src` with matching values for *Rows*, + *Columns*, *Samples per Pixel* and *Bits Allocated*. Required if + the frame properties aren't specified using `kwargs`. + byteorder : str, optional + Required if the samples per pixel is greater than 1 and the value is + not passed using `kwargs`. If `src` is in little-endian byte order + then ``'<'``, otherwise ``'>'`` for big-endian. kwargs : dict A dictionary containing keyword arguments. Required keys are: - * ``{'byteorder': str}``, required if the number of samples per - pixel is greater than 1. If `stream` is in little-endian byte order - then ``'<'``, otherwise ``'>'`` for big-endian. - - And either: - - * ``{'ds': pydicom.dataset.Dataset}``, which is the dataset - corresponding to `stream` with matching values for *Rows*, *Columns*, - *Samples per Pixel* and *Bits Allocated*, or - * ``{'rows': int, 'columns': int, samples_per_px': int, - 'bits_per_px': int}``. + * ``{'rows': int, 'columns': int}`` the number of rows and columns + contained in `src` + * ``{samples_per_px': int}`` the number of samples per pixel, either + 1 for monochrome or 3 for RGB or similar data. + * ``{'bits_per_px': int}`` the number of bits needed to contain each + pixel, either 8, 16, 32 or 64. + * ``{'byteorder': str}``, required if the samples per pixel is greater + than 1. If `src` is in little-endian byte order then ``'<'``, + otherwise ``'>'`` for big-endian. Returns ------- bytes The RLE encoded frame. """ - if 'ds' in kwargs: - ds = kwargs['ds'] + if ds: r, c = ds.Rows, ds.Columns bpp = ds.BitsAllocated spp = ds.SamplesPerPixel else: r, c = kwargs['rows'], kwargs['columns'] - bpp = kwargs['bits_per_pixel'] + bpp = kwargs['bits_per_px'] spp = kwargs['samples_per_px'] # Validate input - if bpp not in [8, 16, 32, 64]: - raise NotImplementedError("'Bits Allocated' must be 8, 16, 32 or 64") - if spp not in [1, 3]: - raise ValueError("'Samples per Pixel' must be 1 or 3") + src = "(0028,0002) 'Samples per Pixel'" if ds else "'samples_per_px'" + raise ValueError(src + " must be 1 or 3") + + if bpp not in [8, 16, 32, 64]: + src = "(0028,0100) 'Bits Allocated'" if ds else "'bits_per_px'" + raise ValueError(src + " must be 8, 16, 32 or 64") if bpp / 8 * spp > 15: raise ValueError( - "Unable to encode the data as the DICOM Standard blah blah" + "Unable to encode the data as the RLE format used by the DICOM " + "Standard only allows a maximum of 15 segments" ) - if len(stream) != (r * c * bpp / 8 * spp): + if bpp > 8 and byteorder not in ('<', '>'): raise ValueError( - "The length of the data doesn't not match" + "A valid 'byteorder' is required when the number of bits per " + "pixel is greater than 8" ) - return encode_frame(stream, r, c, spp, bpp, kwargs['byteorder']) + if len(src) != (r * c * bpp / 8 * spp): + raise ValueError( + "The length of the data doesn't match the image parameters" + ) + + return encode_frame(src, r, c, spp, bpp, byteorder) def generate_frames(ds: "Dataset", reshape: bool = True) -> "np.ndarray": @@ -217,8 +259,8 @@ def pixel_array(ds: "Dataset") -> "np.ndarray": ---------- ds : pydicom.dataset.Dataset The :class:`Dataset` containing an :dcm:`Image Pixel - ` module and the *Pixel Data* to be - converted. + ` module and the *RLE Lossless* encoded + *Pixel Data* to be decoded. Returns ------- @@ -242,3 +284,25 @@ def pixel_array(ds: "Dataset") -> "np.ndarray": arr[offset:offset + frame_len] = frame return reshape_pixel_array(ds, arr) + + +def pixel_data(arr: "np.ndarray", ds: "Dataset") -> bytes: + """Return `arr` as encapsulated and RLE encoded bytes. + + Parameters + ---------- + arr : numpy.ndarray + The :class:`~numpy.ndarray` to be encoded. + ds : pydicom.dataset.Dataset + The dataset corresponding to `arr` with matching values for *Rows*, + *Columns*, *Samples per Pixel* and *Bits Allocated*. + + Returns + ------- + bytes + The encapsulated and RLE encoded `arr`, ready to be used to set + the dataset's *Pixel Data* element. + """ + from pydicom.encaps import encapsulate + + return encapsulate([ii for ii in encode_array(arr, ds)]) From 57eccac4a43073fde5089a3d1076b699cc7c9cd8 Mon Sep 17 00:00:00 2001 From: scaramallion Date: Mon, 26 Apr 2021 13:51:16 +1000 Subject: [PATCH 18/19] Update docs --- README.md | 66 +++++++++++++++++------------------ docs/release_notes/v1.1.0.rst | 4 ++- 2 files changed, 36 insertions(+), 34 deletions(-) diff --git a/README.md b/README.md index 10252d5..6ec3d22 100644 --- a/README.md +++ b/README.md @@ -27,39 +27,6 @@ python -m setup.py develop | --- | --- | --- | --- | | 1.2.840.10008.1.2.5 | RLE Lossless | Yes | Yes | -### Benchmarks -#### Decoding - -Time per 1000 decodes, pydicom's default RLE handler vs. pylibjpeg-rle - -| Dataset | Pixels | Bytes | pydicom | pylibjpeg-rle | -| --- | --- | --- | --- | --- | -| OBXXXX1A_rle.dcm | 480,000 | 480,000 | 4.89 s | 0.79 s | -| OBXXXX1A_rle_2frame.dcm | 960,000 | 960,000 | 9.89 s | 1.65 s | -| SC_rgb_rle.dcm | 10,000 | 30,000 | 0.20 s | 0.15 s | -| SC_rgb_rle_2frame.dcm | 20,000 | 60,000 | 0.32 s | 0.18 s | -| MR_small_RLE.dcm | 4,096 | 8,192 | 0.35 s | 0.13 s | -| emri_small_RLE.dcm | 40,960 | 81,920 | 1.13 s | 0.28 s | -| SC_rgb_rle_16bit.dcm | 10,000 | 60,000 | 0.33 s | 0.17 s | -| SC_rgb_rle_16bit_2frame.dcm | 20,000 | 120,000 | 0.56 s | 0.21 s | -| rtdose_rle_1frame.dcm | 100 | 400 | 0.12 s | 0.13 s | -| rtdose_rle.dcm | 1,500 | 6,000 | 0.53 s | 0.26 s | -| SC_rgb_rle_32bit.dcm | 10,000 | 120,000 | 0.56 s | 0.19 s | -| SC_rgb_rle_32bit_2frame.dcm | 20,000 | 240,000 | 1.03 s | 0.28 s | - -#### Encoding - -Time per 1000 encodes, pydicom's NumPy RLE handler vs. pylibjpeg-rle - -| Dataset | Pixels | Bytes | NumPy | pylibjpeg-rle | -| --- | --- | --- | --- | --- | -| OBXXXX1A.dcm | 480,000 | 480,000 | 30.7 s | 1.36 s | -| SC_rgb.dcm | 10,000 | 30,000 | 1.80 s | 0.09 s | -| MR_small.dcm | 4,096 | 8,192 | 2.29 s | 0.04 s | -| SC_rgb_16bit.dcm | 10,000 | 60,000 | 3.57 s | 0.17 s | -| rtdose_1frame.dcm | 100 | 400 | 0.19 s | 0.003 s | -| SC_rgb_32bit.dcm | 10,000 | 120,000 | 7.20 s | 0.33 s | - ### Usage #### Decoding ##### With pylibjpeg @@ -110,3 +77,36 @@ ds.PixelData = pixel_data(arr, ds) ds.file_meta.TransferSyntaxUID = RLELossless ds.save_as('as_rle.dcm') ``` + +### Benchmarks +#### Decoding + +Time per 1000 decodes, pydicom's default RLE handler vs. pylibjpeg-rle + +| Dataset | Pixels | Bytes | pydicom | pylibjpeg-rle | +| --- | --- | --- | --- | --- | +| OBXXXX1A_rle.dcm | 480,000 | 480,000 | 4.89 s | 0.79 s | +| OBXXXX1A_rle_2frame.dcm | 960,000 | 960,000 | 9.89 s | 1.65 s | +| SC_rgb_rle.dcm | 10,000 | 30,000 | 0.20 s | 0.15 s | +| SC_rgb_rle_2frame.dcm | 20,000 | 60,000 | 0.32 s | 0.18 s | +| MR_small_RLE.dcm | 4,096 | 8,192 | 0.35 s | 0.13 s | +| emri_small_RLE.dcm | 40,960 | 81,920 | 1.13 s | 0.28 s | +| SC_rgb_rle_16bit.dcm | 10,000 | 60,000 | 0.33 s | 0.17 s | +| SC_rgb_rle_16bit_2frame.dcm | 20,000 | 120,000 | 0.56 s | 0.21 s | +| rtdose_rle_1frame.dcm | 100 | 400 | 0.12 s | 0.13 s | +| rtdose_rle.dcm | 1,500 | 6,000 | 0.53 s | 0.26 s | +| SC_rgb_rle_32bit.dcm | 10,000 | 120,000 | 0.56 s | 0.19 s | +| SC_rgb_rle_32bit_2frame.dcm | 20,000 | 240,000 | 1.03 s | 0.28 s | + +#### Encoding + +Time per 1000 encodes, pydicom's default RLE handler vs. pylibjpeg-rle + +| Dataset | Pixels | Bytes | NumPy | pylibjpeg-rle | +| --- | --- | --- | --- | --- | +| OBXXXX1A.dcm | 480,000 | 480,000 | 30.7 s | 1.36 s | +| SC_rgb.dcm | 10,000 | 30,000 | 1.80 s | 0.09 s | +| MR_small.dcm | 4,096 | 8,192 | 2.29 s | 0.04 s | +| SC_rgb_16bit.dcm | 10,000 | 60,000 | 3.57 s | 0.17 s | +| rtdose_1frame.dcm | 100 | 400 | 0.19 s | 0.003 s | +| SC_rgb_32bit.dcm | 10,000 | 120,000 | 7.20 s | 0.33 s | diff --git a/docs/release_notes/v1.1.0.rst b/docs/release_notes/v1.1.0.rst index 67fb2b7..7d069a4 100644 --- a/docs/release_notes/v1.1.0.rst +++ b/docs/release_notes/v1.1.0.rst @@ -8,6 +8,8 @@ Enhancements * Added support for *RLE Lossless* encoding of *Pixel Data* * Added :func:`~rle.utils.encode_array` generator for standalone encoding +* Added :func:`~rle.utils.pixel_data` function for encoding and encapsulating + a numpy ndarray * Added :func:`~rle.utils.encode_pixel_data` entry point for encoding * Added the ability to return decoded data in either little or big endian ordering @@ -17,4 +19,4 @@ Changes * :func:`~rle.utils.pixel_array`, :func:`~rle.utils.generate_frames` and :func:`~rle.utils.decode_pixel_data` now return little-endian ordered - ndarrays + ndarrays by default From e01019338010d3e4fa7383b2753b6e263b87e963 Mon Sep 17 00:00:00 2001 From: scaramallion Date: Mon, 26 Apr 2021 13:54:04 +1000 Subject: [PATCH 19/19] Add version added to docs --- rle/_version.py | 2 +- rle/utils.py | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/rle/_version.py b/rle/_version.py index a38ac7f..b5cccd7 100644 --- a/rle/_version.py +++ b/rle/_version.py @@ -3,7 +3,7 @@ import re -__version__ = '1.1.0.dev0' +__version__ = '1.1.0' VERSION_PATTERN = r""" diff --git a/rle/utils.py b/rle/utils.py index dea4003..2ceafb3 100644 --- a/rle/utils.py +++ b/rle/utils.py @@ -52,6 +52,8 @@ def encode_array( ) -> Generator[bytes, None, None]: """Yield RLE encoded frames from `arr`. + .. versionadded:: 1.1 + Parameters ---------- arr : numpy.ndarray @@ -108,6 +110,8 @@ def encode_pixel_data( ) -> bytes: """Return `src` encoded using the DICOM RLE (PackBits) algorithm. + .. versionadded:: 1.1 + .. warning:: *Samples per Pixel* x *Bits Allocated* must be less than or equal @@ -289,6 +293,8 @@ def pixel_array(ds: "Dataset") -> "np.ndarray": def pixel_data(arr: "np.ndarray", ds: "Dataset") -> bytes: """Return `arr` as encapsulated and RLE encoded bytes. + .. versionadded:: 1.1 + Parameters ---------- arr : numpy.ndarray