Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add optional 'encoding' argument to import_pickle #723

Merged
merged 2 commits into from Aug 14, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
63 changes: 60 additions & 3 deletions menpo/io/input/base.py
Expand Up @@ -325,7 +325,7 @@ def import_video(filepath, landmark_resolver=same_name_video, normalize=None,
"""
normalize = _parse_deprecated_normalise(normalise, normalize)

kwargs = {'normalize': normalize, 'exact_frame_count':exact_frame_count}
kwargs = {'normalize': normalize, 'exact_frame_count': exact_frame_count}

video_importer_methods = {'ffmpeg': ffmpeg_video_types}
if importer_method not in video_importer_methods:
Expand Down Expand Up @@ -358,7 +358,7 @@ def import_landmark_file(filepath, asset=None):
return _import(filepath, image_landmark_types, asset=asset)


def import_pickle(filepath):
def import_pickle(filepath, **kwargs):
r"""Import a pickle file of arbitrary Python objects.

Menpo unambiguously uses ``.pkl`` as it's choice of extension for Pickle
Expand All @@ -378,7 +378,64 @@ def import_pickle(filepath):
object : `object`
Whatever Python objects are present in the Pickle file
"""
return _import(filepath, pickle_types)
return _import(filepath, pickle_types, importer_kwargs=kwargs)


def import_pickles(pattern, max_pickles=None, shuffle=False,
as_generator=False, verbose=False, **kwargs):
r"""Multiple pickle importer.

Menpo unambiguously uses ``.pkl`` as it's choice of extension for Pickle
files. Menpo also supports automatic importing and exporting of gzip
compressed pickle files - just choose a ``filepath`` ending ``pkl.gz`` and
gzip compression will automatically be applied. Compression can massively
reduce the filesize of a pickle file at the cost of longer import and
export times.

Note that this is a function returns a :map:`LazyList`. Therefore, the
function will return immediately and indexing into the returned list
will load a pickle at run time. If all pickles should be loaded, then simply
wrap the returned :map:`LazyList` in a Python `list`.

Parameters
----------
pattern : `str`
A glob path pattern to search for pickles. Every pickle found to match
the glob will be imported one by one. See :map:`pickle_paths` for more
details of what pickles will be found.
max_pickles : positive `int`, optional
If not ``None``, only import the first ``max_pickles`` found. Else,
import all.
shuffle : `bool`, optional
If ``True``, the order of the returned pickles will be randomised. If
``False``, the order of the returned pickles will be alphanumerically
ordered.
as_generator : `bool`, optional
If ``True``, the function returns a generator and assets will be yielded
one after another when the generator is iterated over.
verbose : `bool`, optional
If ``True`` progress of the importing will be dynamically reported with
a progress bar.

Returns
-------
lazy_list : :map:`LazyList` or generator of Python objects
A :map:`LazyList` or generator yielding whatever Python objects are
present in the Pickle file instances that match the glob pattern
provided.

Raises
------
ValueError
If no pickles are found at the provided glob.
"""
return _import_glob_lazy_list(
pattern, pickle_types,
max_assets=max_pickles, shuffle=shuffle,
as_generator=as_generator,
verbose=verbose,
importer_kwargs=kwargs
)


def import_images(pattern, max_images=None, shuffle=False,
Expand Down
13 changes: 11 additions & 2 deletions menpo/io/input/pickle.py
@@ -1,10 +1,19 @@
import sys
try:
import cPickle as pickle
except ImportError:
import pickle
import gzip


def _unpickle_with_encoding(f, encoding=None):
# Support the encoding kwarg on Python 3.x only.
if encoding is not None and sys.version_info.major > 2:
return pickle.load(f, encoding=encoding)
else:
return pickle.load(f)


def pickle_importer(filepath, asset=None, **kwargs):
r"""Import a pickle file.

Expand All @@ -24,7 +33,7 @@ def pickle_importer(filepath, asset=None, **kwargs):
The pickled objects.
"""
with open(str(filepath), 'rb') as f:
x = pickle.load(f)
x = _unpickle_with_encoding(f, encoding=kwargs.get('encoding'))
return x


Expand All @@ -47,5 +56,5 @@ def pickle_gzip_importer(filepath, asset=None, **kwargs):
The pickled objects.
"""
with gzip.open(str(filepath), 'rb') as f:
x = pickle.load(f)
x = _unpickle_with_encoding(f, encoding=kwargs.get('encoding'))
return x
28 changes: 28 additions & 0 deletions menpo/io/test/io_import_test.py
Expand Up @@ -457,6 +457,34 @@ def test_importing_pickle(is_file, mock_open, mock_pickle):
assert objs['test'] == 1


@patch('menpo.io.input.pickle.pickle.load')
@patch('{}.open'.format(builtins_str))
@patch('menpo.io.input.base.Path.is_file')
@patch('sys.version_info')
def test_importing_pickle_encoding_py3(version_info, is_file, mock_open,
mock_pickle):
version_info.major = 3
mock_pickle.return_value = {'test': 1}
is_file.return_value = True

mio.import_pickle('mocked.pkl', encoding='latin1')
assert mock_pickle.call_args[1].get('encoding') == 'latin1'


@patch('menpo.io.input.pickle.pickle.load')
@patch('{}.open'.format(builtins_str))
@patch('menpo.io.input.base.Path.is_file')
@patch('sys.version_info')
def test_importing_pickle_encoding_ignored_py2(version_info, is_file, mock_open,
mock_pickle):
version_info.major = 2
mock_pickle.return_value = {'test': 1}
is_file.return_value = True

mio.import_pickle('mocked.pkl', encoding='latin1')
assert 'encoding' not in mock_pickle.call_args[1]


@patch('menpo.io.input.pickle.pickle.load')
@patch('{}.open'.format(builtins_str))
@patch('menpo.io.input.base.Path.glob')
Expand Down