Permalink
Browse files

ENH: Update joblib

This update mainly fixes a heisen bug in Parallel's doctests.
  • Loading branch information...
1 parent 39149ea commit c88b5bcca53140f558249e8972c847fad76172d1 @GaelVaroquaux GaelVaroquaux committed Apr 29, 2011
View
@@ -96,7 +96,7 @@
"""
-__version__ = '0.5.0a'
+__version__ = '0.5.1'
from .memory import Memory
View
@@ -7,16 +7,17 @@
# License: BSD Style, 3 clauses.
-import platform
+import sys
import os
import shutil
+import time
def disk_used(path):
""" Return the disk usage in a directory.
"""
size = 0
for file in os.listdir(path) + ['.']:
- stat = os.stat(os.path.join(path, file))
+ stat = os.stat(os.path.join(path, file))
if hasattr(stat, 'st_blocks'):
size += stat.st_blocks * 512
else:
@@ -25,31 +26,38 @@ def disk_used(path):
size += (stat.st_size // 512 + 1) * 512;
# We need to convert to int to avoid having longs on some systems (we
# don't want longs to avoid problems we SQLite)
- return int(size/1024.)
+ return int(size / 1024.)
def memstr_to_kbytes(text):
""" Convert a memory text to it's value in kilobytes.
"""
kilo = 1024
- units = dict(K=1, M=kilo, G=kilo**2)
+ units = dict(K=1, M=kilo, G=kilo ** 2)
try:
- size = int(units[text[-1]]*float(text[:-1]))
+ size = int(units[text[-1]] * float(text[:-1]))
except (KeyError, ValueError):
raise ValueError(
"Invalid literal for size give: %s (type %s) should be "
"alike '10G', '500M', '50K'." % (text, type(text))
)
return size
+# if a rmtree operation fails in rm_subdirs, wait for this much time (in secs),
+# then retry once. if it still fails, raise the exception
+RM_SUBDIRS_RETRY_TIME = 0.1
+
def rm_subdirs(path, onerror=None):
"""Remove all subdirectories in this path.
+
+ The directory indicated by `path` is left in place, and its subdirectories
+ are erased.
If onerror is set, it is called to handle the error with arguments (func,
path, exc_info) where func is os.listdir, os.remove, or os.rmdir;
path is the argument to that function that caused it to fail; and
- exc_info is a tuple returned by sys.exc_info(). If ignore_errors
- is false and onerror is None, an exception is raised.
+ exc_info is a tuple returned by sys.exc_info(). If onerror is None,
+ an exception is raised.
"""
# NOTE this code is adapted from the one in shutil.rmtree, and is
@@ -59,9 +67,25 @@ def rm_subdirs(path, onerror=None):
try:
names = os.listdir(path)
except os.error, err:
- onerror(os.listdir, path, sys.exc_info())
-
+ if onerror is not None:
+ onerror(os.listdir, path, sys.exc_info())
+ else:
+ raise
+
for name in names:
fullname = os.path.join(path, name)
if os.path.isdir(fullname):
- shutil.rmtree(fullname, False, onerror)
+ if onerror is not None:
+ shutil.rmtree(fullname, False, onerror)
+ else:
+ # allow the rmtree to fail once, wait and re-try.
+ # if the error is raised again, fail
+ err_count = 0
+ while True:
+ try:
+ shutil.rmtree(fullname, False, None)
+ break
+ except os.error, err:
+ if err_count > 0: raise
+ err_count += 1
+ time.sleep(RM_SUBDIRS_RETRY_TIME)
View
No changes.
View
@@ -194,7 +194,6 @@ def filter_args(func, ignore_lst, *args, **kwargs):
for k, v in kwargs.iteritems())
)
)
-
varkwargs = dict()
View
No changes.
View
No changes.
View
@@ -53,9 +53,6 @@
# TODO: Same remark for the logger, and probably use the Python logging
# mechanism.
-# TODO: Track history as objects are called, to be able to garbage
-# collect them.
-
def extract_first_line(func_code):
""" Extract the first line information from the function code
@@ -76,7 +73,7 @@ class JobLibCollisionWarning(UserWarning):
################################################################################
-# class `Memory`
+# class `MemorizedFunc`
################################################################################
class MemorizedFunc(Logger):
""" Callable object decorating a function for caching its return value
@@ -170,7 +167,14 @@ def __call__(self, *args, **kwargs):
return self.call(*args, **kwargs)
else:
try:
- return self.load_output(output_dir)
+ t0 = time.time()
+ out = self.load_output(output_dir)
+ if self._verbose > 4:
+ t = time.time() - t0
+ _, name = get_func_name(self.func)
+ msg = '%s cache loaded - %s' % (name, format_time(t))
+ print max(0, (80 - len(msg)))*'_' + msg
+ return out
except Exception:
# XXX: Should use an exception logger
self.warn(
@@ -182,6 +186,15 @@ def __call__(self, *args, **kwargs):
shutil.rmtree(output_dir, ignore_errors=True)
return self.call(*args, **kwargs)
+
+ def __reduce__(self):
+ """ We don't store the timestamp when pickling, to avoid the hash
+ depending from it.
+ In addition, when unpickling, we run the __init__
+ """
+ return (self.__class__, (self.func, self.cachedir, self.ignore,
+ self.save_npy, self.mmap_mode, self._verbose))
+
#-------------------------------------------------------------------------
# Private interface
#-------------------------------------------------------------------------
@@ -451,7 +464,7 @@ class Memory(Logger):
All values are cached on the filesystem, in a deep directory
structure.
- see :ref:`memory`
+ see :ref:`memory_reference`
"""
#-------------------------------------------------------------------------
# Public interface
View
No changes.
View
No changes.
View
@@ -1,10 +1,11 @@
"""
-Helpers for embarassingly parallel code.
+Helpers for embarrassingly parallel code.
"""
# Author: Gael Varoquaux < gael dot varoquaux at normalesup dot org >
# Copyright: 2010, Gael Varoquaux
# License: BSD 3 clause
+import os
import sys
import functools
import time
@@ -15,10 +16,15 @@
except:
import pickle
-try:
- import multiprocessing
-except ImportError:
- multiprocessing = None
+# Obtain possible configuration from the environment, assuming 1 (on)
+# by default, upon 0 set to None. Should instructively fail if some non
+# 0/1 value is set.
+multiprocessing = int(os.environ.get('JOBLIB_MULTIPROCESSING', 1)) or None
+if multiprocessing:
+ try:
+ import multiprocessing
+ except ImportError:
+ multiprocessing = None
from .format_stack import format_exc, format_outer_frames
from .logger import Logger, short_format_time
@@ -151,18 +157,18 @@ class Parallel(Logger):
output is sent to stdout.
pre_dispatch: {'all', integer, or expression, as in '3*n_jobs'}
The amount of jobs to be pre-dispatched. Default is 'all',
- but it may be memory consuming, for instance if each job
+ but it may be memory consuming, for instance if each job
involves a lot of a data.
-
+
Notes
-----
This object uses the multiprocessing module to compute in
parallel the application of a function to many different
- arguments. The main functionnality it brings in addition to
+ arguments. The main functionality it brings in addition to
using the raw multiprocessing API are (see examples for details):
- * More readable code, in particular since it avoids
+ * More readable code, in particular since it avoids
constructing list of arguments.
* Easier debuging:
@@ -188,7 +194,7 @@ class Parallel(Logger):
Reshaping the output when the function has several return
values:
-
+
>>> from math import modf
>>> from scikits.learn.externals.joblib import Parallel, delayed
>>> r = Parallel(n_jobs=1)(delayed(modf)(i/2.) for i in range(10))
@@ -201,17 +207,17 @@ class Parallel(Logger):
The progress meter::
>>> from time import sleep
- >>> from scikits.learn.externals.joblib import Parallel, delayed
+ >>> from scikits.learn.joblib import Parallel, delayed
>>> r = Parallel(n_jobs=2, verbose=1)(delayed(sleep)(.1) for _ in range(10)) #doctest: +SKIP
[Parallel(n_jobs=2)]: Done 1 out of 10 |elapsed: 0.1s remaining: 0.9s
[Parallel(n_jobs=2)]: Done 3 out of 10 |elapsed: 0.2s remaining: 0.5s
[Parallel(n_jobs=2)]: Done 5 out of 10 |elapsed: 0.3s remaining: 0.3s
[Parallel(n_jobs=2)]: Done 7 out of 10 |elapsed: 0.4s remaining: 0.2s
[Parallel(n_jobs=2)]: Done 9 out of 10 |elapsed: 0.5s remaining: 0.1s
- Traceback example, note how the ligne of the error is indicated
+ Traceback example, note how the line of the error is indicated
as well as the values of the parameter passed to the function that
- triggered the exception, eventhough the traceback happens in the
+ triggered the exception, even though the traceback happens in the
child process::
>>> from string import atoi
@@ -246,15 +252,15 @@ class Parallel(Logger):
number of iterations reported is underestimated::
>>> from math import sqrt
- >>> from scikits.learn.externals.joblib import Parallel, delayed
+ >>> from scikits.learn.joblib import Parallel, delayed
>>> def producer():
... for i in range(6):
... print 'Produced %s' % i
... yield i
>>> out = Parallel(n_jobs=2, verbose=100, pre_dispatch='1.5*n_jobs')(
- ... delayed(sqrt)(i) for i in producer()) #doctest: +ELLIPSIS
+ ... delayed(sqrt)(i) for i in producer()) #doctest: +SKIP
Produced 0
Produced 1
Produced 2
View
No changes.
View
No changes.
@@ -1,28 +1,22 @@
"""
-Test the format_stack module.
+Unit tests for the stack formatting utilities
"""
# Author: Gael Varoquaux <gael dot varoquaux at normalesup dot org>
# Copyright (c) 2010 Gael Varoquaux
# License: BSD Style, 3 clauses.
+import nose
+
from ..format_stack import safe_repr
-import nose
################################################################################
-class Nasty(object):
+
+class Vicious(object):
def __repr__(self):
raise ValueError
- __str__ = __repr__
-
-################################################################################
-# Test safe_repr
def test_safe_repr():
- """ Smoke test safe_repr on a nasty class.
- """
- nasty = Nasty()
- safe_repr(nasty)
-
+ safe_repr(Vicious())
@@ -8,8 +8,9 @@
import nose
import tempfile
+import functools
-from ..func_inspect import filter_args, get_func_name
+from ..func_inspect import filter_args, get_func_name, get_func_code
from ..memory import Memory
################################################################################
@@ -56,6 +57,7 @@ def test_filter_args():
yield nose.tools.assert_equal, filter_args(f, ['y'], 0, y=1), {'x': 0}
yield nose.tools.assert_equal, filter_args(f, ['x', 'y'], 0), {}
yield nose.tools.assert_equal, filter_args(f, [], 0, y=1), {'x':0, 'y':1}
+ yield nose.tools.assert_equal, filter_args(f, ['y'], x=2, y=1), {'x':2}
yield nose.tools.assert_equal, filter_args(i, [], 2), {'x': 2}
yield nose.tools.assert_equal, filter_args(f2, [], x=1), {'x': 1}
@@ -77,23 +79,54 @@ def test_filter_varargs():
yield nose.tools.assert_equal, filter_args(h, ['*'], 1, 2, 25, ee=2), \
{'x': 1, 'y': 2, '**':{'ee':2}}
+
def test_filter_kwargs():
nose.tools.assert_equal(filter_args(k, [], 1, 2, ee=2),
{'*': [1, 2], '**':{'ee':2}})
nose.tools.assert_equal(filter_args(k, [], 3, 4),
{'*': [3, 4], '**':{}})
-def test_tmp():
+
+def test_filter_args_2():
nose.tools.assert_equal(filter_args(j, [], 1, 2, ee=2),
{'x': 1, 'y': 2, '**':{'ee':2}})
+ nose.tools.assert_raises(ValueError, filter_args, f, 'a', None)
+ # Check that we capture an undefined argument
+ nose.tools.assert_raises(ValueError, filter_args, f, ['a'], None)
+ ff = functools.partial(f, 1)
+ # filter_args has to special-case partial
+ nose.tools.assert_equal(filter_args(ff, [], 1),
+ {'*': [1], '**':{}})
+ nose.tools.assert_equal(filter_args(ff, ['y'], 1),
+ {'*': [1], '**':{}})
+
def test_func_name():
yield nose.tools.assert_equal, 'f', get_func_name(f)[1]
# Check that we are not confused by the decoration
yield nose.tools.assert_equal, 'g', get_func_name(g)[1]
+def test_func_inspect_errors():
+ """ Check that func_inspect is robust and will work on weird objects
+ """
+ nose.tools.assert_equal(get_func_name('a'.lower)[-1], 'lower')
+ nose.tools.assert_equal(get_func_code('a'.lower)[1:], (None, -1))
+ ff = lambda x: x
+ nose.tools.assert_equal(get_func_name(ff, win_characters=False)[-1],
+ '<lambda>')
+ nose.tools.assert_equal(get_func_code(ff)[1],
+ __file__.replace('.pyc', '.py'))
+ # Simulate a function defined in __main__
+ ff.__module__ = '__main__'
+ nose.tools.assert_equal(get_func_name(ff, win_characters=False)[-1],
+ '<lambda>')
+ nose.tools.assert_equal(get_func_code(ff)[1],
+ __file__.replace('.pyc', '.py'))
+
+
+
def test_bound_methods():
""" Make sure that calling the same method on two different instances
of the same class does resolv to different signatures.
Oops, something went wrong.

4 comments on commit c88b5bc

Owner

ogrisel replied Apr 29, 2011

Thanks :)

Owner

mblondel replied Apr 29, 2011

Owner

GaelVaroquaux replied Apr 29, 2011

I like the fact that I have to do manual updates: it separates breakage in joblib from breakage in scikits.learn ;)

Owner

mblondel replied Apr 29, 2011

Please sign in to comment.