Skip to content

Commit

Permalink
Merge pull request #255 from us:todo-free-disk-size
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 241555248
  • Loading branch information
Copybara-Service committed Apr 2, 2019
2 parents a81a42f + 554a2b3 commit 23df840
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 0 deletions.
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
'numpy',
'promise',
'protobuf>=3.6.1',
'psutil'
'requests',
'six',
'tensorflow-metadata',
Expand Down
7 changes: 7 additions & 0 deletions tensorflow_datasets/core/dataset_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,9 @@ def download_and_prepare(self, download_dir=None, download_config=None):
Defaults to "~/tensorflow-datasets/downloads".
download_config: `tfds.download.DownloadConfig`, further configuration for
downloading and preparing dataset.
Raises:
IOError: if there is not enough disk space available.
"""

download_config = download_config or download.DownloadConfig()
Expand All @@ -224,6 +227,10 @@ def download_and_prepare(self, download_dir=None, download_config=None):
"please update the version number.".format(self.name, self._data_dir,
self.info.version))
logging.info("Generating dataset %s (%s)", self.name, self._data_dir)
if not utils.has_sufficient_disk_space(
self.info.size_in_bytes, directory=self._data_dir_root):
raise IOError("Not enough disk space. Needed: %s" %
units.size_str(self.info.size_in_bytes))
self._log_download_bytes()

# Create a tmp dir and rename to self._data_dir on successful exit.
Expand Down
9 changes: 9 additions & 0 deletions tensorflow_datasets/core/utils/py_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import sys
import uuid

import psutil
import six
import tensorflow as tf
from tensorflow_datasets.core import constants
Expand Down Expand Up @@ -275,3 +276,11 @@ def rgetattr(obj, attr, *args):
def _getattr(obj, attr):
return getattr(obj, attr, *args)
return functools.reduce(_getattr, [obj] + attr.split("."))


def has_sufficient_disk_space(needed_bytes, directory="."):
try:
free_bytes = psutil.disk_usage(os.path.abspath(directory)).free
except OSError:
return True
return needed_bytes < free_bytes

0 comments on commit 23df840

Please sign in to comment.