Skip to content

Commit

Permalink
fix(reader): convert bytes to string for h5py >= 3.0 #40
Browse files Browse the repository at this point in the history
  • Loading branch information
fabianbalsiger committed Feb 25, 2022
1 parent 446192d commit f49588d
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 26 deletions.
19 changes: 19 additions & 0 deletions pymia/data/extraction/byte_converter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@


def convert_to_string(data):
"""Converts extracted string data from bytes to string, as strings are handled as bytes since h5py >= 3.0.
The function has been introduced as part of an `issue <https://github.com/rundherum/pymia/issues/40>`_.
Args:
data: The data to be converted; either :obj:`bytes` or list of :obj:`bytes`.
Returns:
The converted data as :obj:`str` or list of :obj:`str`.
"""
if isinstance(data, bytes):
return data.decode('utf-8')
elif isinstance(data, list):
return [convert_to_string(d) for d in data]
else:
return data
34 changes: 9 additions & 25 deletions pymia/data/extraction/extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

import pymia.data.conversion as conv
import pymia.data.definition as defs
import pymia.data.extraction.byte_converter as byte_converter
import pymia.data.indexexpression as expr
from . import reader as rd

Expand Down Expand Up @@ -72,7 +73,7 @@ def extract(self, reader: rd.Reader, params: dict, extracted: dict) -> None:
d = self.cached_result

for k, v in d.items():
extracted[k] = _convert_to_string(v)
extracted[k] = byte_converter.convert_to_string(v)

def _extract(self, reader: rd.Reader):
d = {}
Expand All @@ -94,7 +95,7 @@ def extract(self, reader: rd.Reader, params: dict, extracted: dict) -> None:
"""see :meth:`.Extractor.extract`"""
extracted[defs.KEY_SUBJECT_INDEX] = params[defs.KEY_SUBJECT_INDEX]
subject_index_expr = expr.IndexExpression(params[defs.KEY_SUBJECT_INDEX])
extracted[defs.KEY_SUBJECT] = _convert_to_string(reader.read(defs.LOC_SUBJECT, subject_index_expr))
extracted[defs.KEY_SUBJECT] = byte_converter.convert_to_string(reader.read(defs.LOC_SUBJECT, subject_index_expr))


class IndexingExtractor(Extractor):
Expand Down Expand Up @@ -196,10 +197,10 @@ def extract(self, reader: rd.Reader, params: dict, extracted: dict) -> None:
else:
file_root = self.cached_file_root

extracted[defs.KEY_FILE_ROOT] = _convert_to_string(file_root)
extracted[defs.KEY_FILE_ROOT] = byte_converter.convert_to_string(file_root)

for category in self.categories:
extracted[defs.KEY_PLACEHOLDER_FILES.format(category)] = _convert_to_string(
extracted[defs.KEY_PLACEHOLDER_FILES.format(category)] = byte_converter.convert_to_string(
reader.read(defs.LOC_FILES_PLACEHOLDER.format(category), subject_index_expr))


Expand Down Expand Up @@ -303,6 +304,7 @@ def extract(self, reader: rd.Reader, params: dict, extracted: dict) -> None:

random_index = [np.random.choice(selection_indices)] # as list to keep the last dimension with np.take
extracted[self.category] = np.take(data, random_index, axis=-1)
# todo(fabianbalsiger): add names selected similar to SelectiveDataExtractor


class ImagePropertyShapeExtractor(Extractor):
Expand Down Expand Up @@ -364,7 +366,7 @@ def extract(self, reader: rd.Reader, params: dict, extracted: dict) -> None:
data = reader.read('{}/{}'.format(defs.LOC_DATA_PLACEHOLDER.format(category), index_str))
else:
data = reader.read('{}/{}'.format(defs.LOC_DATA_PLACEHOLDER.format(category), index_str), index_expr)
extracted[category] = data
extracted[category] = byte_converter.convert_to_string(data)


class PadDataExtractor(Extractor):
Expand Down Expand Up @@ -471,12 +473,12 @@ def extract(self, reader: rd.Reader, params: dict, extracted: dict) -> None:
subject_index_expr = expr.IndexExpression(params[defs.KEY_SUBJECT_INDEX])

if self.cached_file_root is None:
self.cached_file_root = _convert_to_string(reader.read(defs.LOC_FILES_ROOT))
self.cached_file_root = byte_converter.convert_to_string(reader.read(defs.LOC_FILES_ROOT))

file_root = self.cached_file_root

for category in self.categories:
rel_file_paths = _convert_to_string(reader.read(defs.LOC_FILES_PLACEHOLDER.format(category), subject_index_expr))
rel_file_paths = byte_converter.convert_to_string(reader.read(defs.LOC_FILES_PLACEHOLDER.format(category), subject_index_expr))

loaded = []
for rel_file_path in rel_file_paths:
Expand All @@ -487,21 +489,3 @@ def extract(self, reader: rd.Reader, params: dict, extracted: dict) -> None:
data = data[index_expr.expression]
extracted[category] = data


def _convert_to_string(data):
"""Converts extracted string data from bytes to string, as strings are handled as bytes since h5py >= 3.0.
The function has been introduced as part of an `issue <https://github.com/rundherum/pymia/issues/40>`_.
Args:
data: The data to be converted; either :obj:`bytes` or list of :obj:`bytes`.
Returns:
The converted data as :obj:`str` or list of :obj:`str`.
"""
if isinstance(data, bytes):
return data.decode('utf-8')
elif isinstance(data, list):
return [_convert_to_string(d) for d in data]
else:
return data
3 changes: 2 additions & 1 deletion pymia/data/extraction/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import numpy as np

import pymia.data.definition as defs
import pymia.data.extraction.byte_converter as byte_converter
import pymia.data.indexexpression as expr


Expand Down Expand Up @@ -120,7 +121,7 @@ def get_shape(self, subject_index: int) -> list:

def get_subjects(self) -> list:
"""see :meth:`.Reader.get_subjects`"""
return self.read(defs.LOC_SUBJECT)
return byte_converter.convert_to_string(self.read(defs.LOC_SUBJECT))

def read(self, entry: str, index: expr.IndexExpression = None):
"""see :meth:`.Reader.read`"""
Expand Down

0 comments on commit f49588d

Please sign in to comment.