Skip to content

Commit

Permalink
moved file handle saving functionality to SaveLoad base class
Browse files Browse the repository at this point in the history
  • Loading branch information
macks22 committed Feb 10, 2015
1 parent f91b51a commit fcf12fa
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 30 deletions.
21 changes: 0 additions & 21 deletions gensim/corpora/dictionary.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,6 @@
import logging
import itertools

try:
import cPickle as pickle
except ImportError:
import pickle

from gensim import utils

if sys.version_info[0] >= 3:
Expand Down Expand Up @@ -398,20 +393,4 @@ def from_corpus(corpus, id2word=None):
(result, result.num_docs, result.num_pos))
return result

def save(self, fname_or_handle):
"""
Save the object to file (also see `load`).
`fname_or_handle` is either a string specifying the file name to
save to, or an open file-like object which can be written to.
"""
logger.info("saving %s object" % self.__class__.__name__)
try:
pickle.dump(self, fname_or_handle)
except TypeError: # `fname_or_handle` does not have write attribute
with open(fname_or_handle, 'wb') as fd:
pickle.dump(self, fd)


#endclass Dictionary
50 changes: 41 additions & 9 deletions gensim/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,19 +228,22 @@ def load(cls, fname, mmap=None):
setattr(obj, attrib, None)
return obj

def save(self, fname, separately=None, sep_limit=10 * 1024**2, ignore=frozenset()):
def _smart_save(self, fname, separately=None, sep_limit=10 * 1024**2, ignore=frozenset()):
"""
Save the object to file (also see `load`).
If `separately` is None, automatically detect large numpy/scipy.sparse arrays
in the object being stored, and store them into separate files. This avoids
pickle memory errors and allows mmap'ing large arrays back on load efficiently.
If `separately` is None, automatically detect large
numpy/scipy.sparse arrays in the object being stored, and store
them into separate files. This avoids pickle memory errors and
allows mmap'ing large arrays back on load efficiently.
You can also set `separately` manually, in which case it must be a list of attribute
names to be stored in separate files. The automatic check is not performed in this case.
You can also set `separately` manually, in which case it must be
a list of attribute names to be stored in separate files. The
automatic check is not performed in this case.
`ignore` is a set of attribute names to *not* serialize (file handles, caches etc). On
subsequent load() these attributes will be set to None.
`ignore` is a set of attribute names to *not* serialize (file
handles, caches etc). On subsequent load() these attributes will
be set to None.
"""
logger.info("saving %s object under %s, separately %s" % (self.__class__.__name__, fname, separately))
Expand Down Expand Up @@ -290,6 +293,35 @@ def save(self, fname, separately=None, sep_limit=10 * 1024**2, ignore=frozenset(
# restore the attributes
for attrib, val in iteritems(tmp):
setattr(self, attrib, val)

def save(self, fname_or_handle, separately=None, sep_limit=10 * 1024**2, ignore=frozenset()):
"""
Save the object to file (also see `load`).
`fname_or_handle` is either a string specifying the file name to
save to, or an open file-like object which can be written to. If
the object is a file handle, no special array handling will be
performed; all attributes will be saved to the same file.
If `separately` is None, automatically detect large
numpy/scipy.sparse arrays in the object being stored, and store
them into separate files. This avoids pickle memory errors and
allows mmap'ing large arrays back on load efficiently.
You can also set `separately` manually, in which case it must be
a list of attribute names to be stored in separate files. The
automatic check is not performed in this case.
`ignore` is a set of attribute names to *not* serialize (file
handles, caches etc). On subsequent load() these attributes will
be set to None.
"""
try:
pickle.dump(self, fname_or_handle, protocol=_pickle.HIGHEST_PROTOCOL)
logger.info("saved %s object" % self.__class__.__name__)
except TypeError: # `fname_or_handle` does not have write attribute
self._smart_save(fname_or_handle, separately, sep_limit, ignore)
#endclass SaveLoad


Expand Down Expand Up @@ -715,7 +747,7 @@ def smart_open(fname, mode='rb'):
return open(fname, mode)


def pickle(obj, fname, protocol=-1):
def pickle(obj, fname, protocol=_pickle.HIGHEST_PROTOCOL):
"""Pickle object `obj` to file `fname`."""
with smart_open(fname, 'wb') as fout: # 'b' for binary, needed on Windows
_pickle.dump(obj, fout, protocol=protocol)
Expand Down

0 comments on commit fcf12fa

Please sign in to comment.