Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[MRG] Add RLE encoding #730

Merged
merged 13 commits into from Oct 11, 2018
24 changes: 23 additions & 1 deletion pydicom/benchmarks/bench_handler_rle.py
@@ -1,12 +1,16 @@
# Copyright 2008-2018 pydicom authors. See LICENSE file for details.
"""Benchmarks for the rle_handler module."""

import numpy as np

from pydicom import dcmread
from pydicom.data import get_testdata_files
from pydicom.encaps import decode_data_sequence
from pydicom.encaps import decode_data_sequence, defragment_data
from pydicom.pixel_data_handlers.rle_handler import (
get_pixeldata,
_rle_decode_frame,
_rle_encode_segment,
_rle_decode_segment,
)


Expand Down Expand Up @@ -108,3 +112,21 @@ def time_32bit_3sample(self):
"""Time retrieval of 32-bit, 3 sample/pixel RLE data."""
for ii in range(self.no_runs):
get_pixeldata(self.ds_32_3_1)


class TimeRLEEncodeSegment(object):
"""Time tests for rle_handler._rle_encode_segment."""
def setup(self):
ds = dcmread(OB_RLE_1F)
pixel_data = defragment_data(ds.PixelData)
decoded = _rle_decode_segment(pixel_data[64:])
assert ds.Rows * ds.Columns == len(decoded)
self.arr = np.frombuffer(decoded, 'uint8').reshape(ds.Rows, ds.Columns)

self.no_runs = 100

def time_encode(self):
"""Time encoding a full segment."""
# Re-encode the decoded data
for ii in range(self.no_runs):
_rle_encode_segment(self.arr)
179 changes: 178 additions & 1 deletion pydicom/pixel_data_handlers/rle_handler.py
Expand Up @@ -33,7 +33,8 @@

"""

from struct import unpack
from itertools import groupby
from struct import pack, unpack

import numpy as np

Expand Down Expand Up @@ -345,3 +346,179 @@ def _rle_decode_segment(data):
pass

return result


# RLE encoding functions
def rle_encode(arr):
"""Return the contents of `arr` as a list of RLE encoded bytearray.

Parameters
----------
arr : numpy.ndarray
A 2D, 3D or 4D numpy ndarray containing one or more frames of image
data.

Returns
-------
list of bytearray
A list of RLE encoded frames ready for encapsulation.
"""
# FIXME: This won't work as is
if len(arr.shape) in [2, 3]:
return [_rle_encode_frame(arr)]

out = []
for frame in arr:
out.append(_rle_encoded_frame(arr))

return out


def _rle_encode_frame(arr):
"""Return an numpy ndarray image frame as RLE encoded bytearray.

Parameters
----------
arr : numpy.ndarray
A 2D (if Samples Per Pixel = 1) or 3D (if Samples Per Pixel = 3)
ndarray containing a single frame of the image to be RLE encoded.

Returns
-------
bytearray
An RLE encoded frame, including the RLE header, following the format
specified by the DICOM Standard, Part 5, Annex G.
"""
rle_data = bytearray()
seg_lengths = []
if len(arr.shape) == 3:
# Samples Per Pixel = 3
for plane in arr:
# Bluh, should use a generator here
for segment in _rle_encode_plane(plane):
rle_data.extend(segment)
seg_lengths.append(len(segment))
else:
# Samples Per Pixel = 1
for segment in _rle_encode_plane(plane):
rle_data.extend(segment)
seg_lengths.append(len(segment))

# Add the number of segments to the header
rle_header = bytearray(pack('<L', len(seg_lengths)))

# Add the segment offsets, starting at 64 for the first segment
# We don't need an offset to any data at the end of the last segment
offsets = [64]
for ii, length in enumerate(seg_lengths[:-1]):
offsets.append(offsets[ii] + length)
rle_header.extend(pack('<{}L'.format(len(offsets)), *offsets))

# Add trailing padding to make up the rest of the header (if required)
rle_header.extend(b'\x00' * (64 - len(rle_header)))

return rle_header.extend(rle_data)


def _rle_encode_plane(arr):
"""Yield RLE encoded segments from an image plane as bytearray.

Parameters
----------
arr : numpy.ndarray
A 2D ndarray containing a single plane of the image data to be RLE
encoded. The dtype of the array should be a multiple of 8 (i.e. uint8,
uint32, int16, etc.).

Yields
------
bytearray
An RLE encoded segment of the plane, following the format specified
by the DICOM Standard, Part 5, Annex G.
"""
# Re-view the N-bit array data as N / 8 x uint8s
arr8 = arr.view(np.uint8)
# Reshape the uint8 array data into 1 or more segments and encode
bytes_per_sample = arr8.shape[1] // arr.shape[1]
for ii in range(bytes_per_sample):
segment = arr8.ravel()[ii::bytes_per_sample].reshape(arr.shape)
yield _rle_encode_segment(segment)


def _rle_encode_segment(arr):
"""Return a 2D numpy ndarray as an RLE encoded bytearray.

Parameters
----------
arr : numpy.ndarray
A 2D ndarray of 8-bit uint data, representing a Byte Segment as in
the DICOM Standard, Part 5, Annex G.2.

Returns
-------
bytearray
The RLE encoded segment, following the format specified by the DICOM
Standard. Odd length encoded segments are padded by a trailing 0x00
to be even length.
"""
out = bytearray()
for row in arr:
out.extend(_rle_encode_row(row))

# Pad odd length data with a trailing 0x00 byte
out.extend(b'\x00' * (len(out) % 2))

return out


def _rle_encode_row(arr):
"""Return a numpy array as an RLE encoded bytearray.

Parameters
----------
arr : numpy.ndarray
A 1D ndarray of 8-bit uint data.

Returns
-------
bytes
The RLE encoded row, following the format specified by the DICOM
Standard, Part 5, Annex G.
"""
out = []
out_append = out.append
out_extend = out.extend

literal = []
for key, group in groupby(arr.astype('uint8').tolist()):
group = list(group)
if len(group) == 1:
literal.append(group[0])
else:
if literal:
# Literal runs
for ii in range(0, len(literal), 128):
_run = literal[ii:ii + 128]
out_append(len(_run) - 1)
out_extend(_run)

literal = []

# Replicate run
for ii in range(0, len(group), 128):
if len(group[ii:ii + 128]) > 1:
# Replicate run
out_append(257 - len(group[ii:ii + 128]))
out_append(group[0])
else:
# Literal run only if last replicate is all alone
out_append(0)
out_append(group[0])

# Finally literal run if literal isn't followed by a replicate run
for ii in range(0, len(literal), 128):
_run = literal[ii:ii + 128]
out_append(len(_run) - 1)
out_extend(_run)

return pack('{}B'.format(len(out)), *out)