Skip to content

Commit

Permalink
Fix reading of bunch pickles
Browse files Browse the repository at this point in the history
that have been generated with scikit-learn < 0.17
  • Loading branch information
lesteve authored and ogrisel committed Jan 28, 2016
1 parent 887a069 commit 3569e27
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 2 deletions.
12 changes: 10 additions & 2 deletions sklearn/datasets/base.py
Expand Up @@ -57,8 +57,16 @@ def __getattr__(self, key):
except KeyError:
raise AttributeError(key)

def __getstate__(self):
return self.__dict__
def __setstate__(self, state):
# Bunch pickles generated with scikit-learn 0.16.* have an non
# empty __dict__. This causes a surprising behaviour when
# loading these pickles scikit-learn 0.17: reading bunch.key
# uses __dict__ but assigning to bunch.key use __setattr__ and
# only changes bunch['key']. More details can be found at:
# https://github.com/scikit-learn/scikit-learn/issues/6196.
# Overriding __setstate__ to be a noop has the effect of
# ignoring the pickled __dict__
pass


def get_data_home(data_home=None):
Expand Down
21 changes: 21 additions & 0 deletions sklearn/datasets/tests/test_base.py
Expand Up @@ -202,3 +202,24 @@ def test_loads_dumps_bunch():
bunch_from_pkl = loads(dumps(bunch))
bunch_from_pkl.x = "y"
assert_equal(bunch_from_pkl['x'], bunch_from_pkl.x)


def test_bunch_pickle_generated_with_0_16_and_read_with_0_17():
bunch = Bunch(key='original')
# This reproduces a problem when Bunch pickles have been created
# with scikit-learn 0.16 and are read with 0.17. Basically there
# is a suprising behaviour because reading bunch.key uses
# bunch.__dict__ (which is non empty for 0.16 Bunch objects)
# whereas assigning into bunch.key uses bunch.__setattr__. See
# https://github.com/scikit-learn/scikit-learn/issues/6196 for
# more details
bunch.__dict__['key'] = 'set from __dict__'
bunch_from_pkl = loads(dumps(bunch))
# After loading from pickle the __dict__ should have been ignored
assert_equal(bunch_from_pkl.key, 'original')
assert_equal(bunch_from_pkl['key'], 'original')
# Making sure that changing the attr does change the value
# associated with __getitem__ as well
bunch_from_pkl.key = 'changed'
assert_equal(bunch_from_pkl.key, 'changed')
assert_equal(bunch_from_pkl['key'], 'changed')

0 comments on commit 3569e27

Please sign in to comment.