Skip to content

Commit

Permalink
fix(extractor): convert bytes to string for h5py >= 3.0 #40
Browse files Browse the repository at this point in the history
  • Loading branch information
fabianbalsiger committed Oct 8, 2021
1 parent d63bbc6 commit c87cc56
Showing 1 changed file with 27 additions and 7 deletions.
34 changes: 27 additions & 7 deletions pymia/data/extraction/extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ def extract(self, reader: rd.Reader, params: dict, extracted: dict) -> None:


class NamesExtractor(Extractor):

def __init__(self, cache: bool = True, categories=(defs.KEY_IMAGES, defs.KEY_LABELS)) -> None:
"""Extracts the names of the entries within a category (e.g. "Flair", "T1" for the category "images").
Expand All @@ -71,7 +72,7 @@ def extract(self, reader: rd.Reader, params: dict, extracted: dict) -> None:
d = self.cached_result

for k, v in d.items():
extracted[k] = v
extracted[k] = _convert_to_string(v)

def _extract(self, reader: rd.Reader):
d = {}
Expand All @@ -93,7 +94,7 @@ def extract(self, reader: rd.Reader, params: dict, extracted: dict) -> None:
"""see :meth:`.Extractor.extract`"""
extracted[defs.KEY_SUBJECT_INDEX] = params[defs.KEY_SUBJECT_INDEX]
subject_index_expr = expr.IndexExpression(params[defs.KEY_SUBJECT_INDEX])
extracted[defs.KEY_SUBJECT] = reader.read(defs.LOC_SUBJECT, subject_index_expr)
extracted[defs.KEY_SUBJECT] = _convert_to_string(reader.read(defs.LOC_SUBJECT, subject_index_expr))


class IndexingExtractor(Extractor):
Expand Down Expand Up @@ -195,11 +196,11 @@ def extract(self, reader: rd.Reader, params: dict, extracted: dict) -> None:
else:
file_root = self.cached_file_root

extracted[defs.KEY_FILE_ROOT] = file_root
extracted[defs.KEY_FILE_ROOT] = _convert_to_string(file_root)

for category in self.categories:
extracted[defs.KEY_PLACEHOLDER_FILES.format(category)] = reader.read(defs.LOC_FILES_PLACEHOLDER.format(category),
subject_index_expr)
extracted[defs.KEY_PLACEHOLDER_FILES.format(category)] = _convert_to_string(
reader.read(defs.LOC_FILES_PLACEHOLDER.format(category), subject_index_expr))


class SelectiveDataExtractor(Extractor):
Expand Down Expand Up @@ -466,12 +467,12 @@ def extract(self, reader: rd.Reader, params: dict, extracted: dict) -> None:
subject_index_expr = expr.IndexExpression(params[defs.KEY_SUBJECT_INDEX])

if self.cached_file_root is None:
self.cached_file_root = reader.read(defs.LOC_FILES_ROOT)
self.cached_file_root = _convert_to_string(reader.read(defs.LOC_FILES_ROOT))

file_root = self.cached_file_root

for category in self.categories:
rel_file_paths = reader.read(defs.LOC_FILES_PLACEHOLDER.format(category), subject_index_expr)
rel_file_paths = _convert_to_string(reader.read(defs.LOC_FILES_PLACEHOLDER.format(category), subject_index_expr))

loaded = []
for rel_file_path in rel_file_paths:
Expand All @@ -481,3 +482,22 @@ def extract(self, reader: rd.Reader, params: dict, extracted: dict) -> None:
if not self.ignore_indexing:
data = data[index_expr.expression]
extracted[category] = data


def _convert_to_string(data):
"""Converts extracted string data from bytes to string, as strings are handled as bytes since h5py >= 3.0.
The function has been introduced as part of an `issue <https://github.com/rundherum/pymia/issues/40>`_.
Args:
data: The data to be converted; either :obj:`bytes` or list of :obj:`bytes`.
Returns:
The converted data as :obj:`str` or list of :obj:`str`.
"""
if isinstance(data, bytes):
return data.decode('utf-8')
elif isinstance(data, list):
return [_convert_to_string(d) for d in data]
else:
return data

0 comments on commit c87cc56

Please sign in to comment.