Skip to content

Commit

Permalink
[WIP] changing default root for datasets (#1361)
Browse files Browse the repository at this point in the history
  • Loading branch information
parmeet authored Jul 22, 2021
1 parent f7c2985 commit 05cb992
Show file tree
Hide file tree
Showing 4 changed files with 26 additions and 26 deletions.
22 changes: 11 additions & 11 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ commands:
steps:
- run:
name: Generate CCI cache key
command:
command: |
echo "$(date "+%D")" > .cachekey
cat .circleci/cached_datasets_list.txt >> .cachekey
- persist_to_workspace:
Expand Down Expand Up @@ -380,24 +380,24 @@ jobs:
name: Generate cache
no_output_timeout: 30m
command: |
if [ ! -f .data/cache_status_file.json ] ; then
if [ ! -f /root/.torchtext/cache/cache_status_file.json ] ; then
.circleci/unittest/linux/scripts/setup_env.sh
.circleci/unittest/linux/scripts/install.sh
.circleci/unittest/linux/scripts/generate_cache.sh
fi
cat .data/cache_status_file.json
cat /root/.torchtext/cache/cache_status_file.json
- save_cache:

key: v1-linux-dataset-{{ checksum ".cachekey" }}

paths:
- .data
- /root/.torchtext/cache
- save_cache:

key: v1-linux-cache-index-{{ checksum ".cachekey" }}

paths:
- .data/cache_status_file.json
- /root/.torchtext/cache/cache_status_file.json

unittest_linux:
<<: *binary_common
Expand Down Expand Up @@ -432,7 +432,7 @@ jobs:

paths:
- .vector_cache
- .data
- /root/.torchtext/cache
- run:
name: Post process
command: .circleci/unittest/linux/scripts/post_process.sh
Expand All @@ -457,24 +457,24 @@ jobs:
name: Generate daily data Cache
no_output_timeout: 30m
command: |
if [ ! -f .data/cache_status_file.json ] ; then
if [ ! -f C:/Users/circleci/.torchtext/cache/cache_status_file.json ] ; then
.circleci/unittest/windows/scripts/setup_env.sh
.circleci/unittest/windows/scripts/install.sh
.circleci/unittest/windows/scripts/generate_cache.sh
fi
cat .data/cache_status_file.json
cat C:/Users/circleci/.torchtext/cache/cache_status_file.json
- save_cache:

key: v1-windows-dataset-{{ checksum ".cachekey" }}

paths:
- .data
- C:/Users/circleci/.torchtext/cache
- save_cache:

key: v1-windows-cache-index-{{ checksum ".cachekey" }}

paths:
- .data/cache_status_file.json
- C:/Users/circleci/.torchtext/cache/cache_status_file.json

unittest_windows:
<<: *binary_common
Expand Down Expand Up @@ -509,7 +509,7 @@ jobs:

paths:
- .vector_cache
- .data
- C:/Users/circleci/.torchtext/cache
- run:
name: Post process
command: .circleci/unittest/windows/scripts/post_process.sh
Expand Down
22 changes: 11 additions & 11 deletions .circleci/config.yml.in
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ commands:
steps:
- run:
name: Generate CCI cache key
command:
command: |
echo "$(date "+%D")" > .cachekey
cat .circleci/cached_datasets_list.txt >> .cachekey
- persist_to_workspace:
Expand Down Expand Up @@ -380,24 +380,24 @@ jobs:
name: Generate cache
no_output_timeout: 30m
command: |
if [ ! -f .data/cache_status_file.json ] ; then
if [ ! -f /root/.torchtext/cache/cache_status_file.json ] ; then
.circleci/unittest/linux/scripts/setup_env.sh
.circleci/unittest/linux/scripts/install.sh
.circleci/unittest/linux/scripts/generate_cache.sh
fi
cat .data/cache_status_file.json
cat /root/.torchtext/cache/cache_status_file.json
- save_cache:
{% raw %}
key: v1-linux-dataset-{{ checksum ".cachekey" }}
{% endraw %}
paths:
- .data
- /root/.torchtext/cache
- save_cache:
{% raw %}
key: v1-linux-cache-index-{{ checksum ".cachekey" }}
{% endraw %}
paths:
- .data/cache_status_file.json
- /root/.torchtext/cache/cache_status_file.json

unittest_linux:
<<: *binary_common
Expand Down Expand Up @@ -432,7 +432,7 @@ jobs:
{% endraw %}
paths:
- .vector_cache
- .data
- /root/.torchtext/cache
- run:
name: Post process
command: .circleci/unittest/linux/scripts/post_process.sh
Expand All @@ -457,24 +457,24 @@ jobs:
name: Generate daily data Cache
no_output_timeout: 30m
command: |
if [ ! -f .data/cache_status_file.json ] ; then
if [ ! -f C:/Users/circleci/.torchtext/cache/cache_status_file.json ] ; then
.circleci/unittest/windows/scripts/setup_env.sh
.circleci/unittest/windows/scripts/install.sh
.circleci/unittest/windows/scripts/generate_cache.sh
fi
cat .data/cache_status_file.json
cat C:/Users/circleci/.torchtext/cache/cache_status_file.json
- save_cache:
{% raw %}
key: v1-windows-dataset-{{ checksum ".cachekey" }}
{% endraw %}
paths:
- .data
- C:/Users/circleci/.torchtext/cache
- save_cache:
{% raw %}
key: v1-windows-cache-index-{{ checksum ".cachekey" }}
{% endraw %}
paths:
- .data/cache_status_file.json
- C:/Users/circleci/.torchtext/cache/cache_status_file.json

unittest_windows:
<<: *binary_common
Expand Down Expand Up @@ -509,7 +509,7 @@ jobs:
{% endraw %}
paths:
- .vector_cache
- .data
- C:/Users/circleci/.torchtext/cache
- run:
name: Post process
command: .circleci/unittest/windows/scripts/post_process.sh
Expand Down
4 changes: 2 additions & 2 deletions test/common/cache_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,11 @@
import torchtext
from .parameterized_utils import load_params

CACHE_STATUS_FILE = '.data/cache_status_file.json'
CACHE_STATUS_FILE = os.path.join(os.path.expanduser('~/.torchtext/cache'), 'cache_status_file.json')


def check_cache_status():
assert os.path.exists(CACHE_STATUS_FILE), "Cache status file does not exists"
assert os.path.exists(CACHE_STATUS_FILE), "Cache status file [{}] does not exists".format(CACHE_STATUS_FILE)
with open(CACHE_STATUS_FILE, 'r') as f:
missing_datasets = []
cache_status = json.load(f)
Expand Down
4 changes: 2 additions & 2 deletions torchtext/data/datasets_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,7 @@ def _wrap_split_argument_with_fn(fn, splits):
raise ValueError("Internal Error: Given function {} did not adhere to standard signature.".format(fn))

@functools.wraps(fn)
def new_fn(root='.data', split=splits, **kwargs):
def new_fn(root=os.path.expanduser('~/.torchtext/cache'), split=splits, **kwargs):
result = []
for item in _check_default_set(split, splits, fn.__name__):
result.append(fn(root, item, **kwargs))
Expand Down Expand Up @@ -250,7 +250,7 @@ def decorator(func):
raise ValueError("Internal Error: Given function {} did not adhere to standard signature.".format(fn))

@functools.wraps(func)
def wrapper(root='.data', *args, **kwargs):
def wrapper(root=os.path.expanduser('~/.torchtext/cache'), *args, **kwargs):
new_root = os.path.join(root, dataset_name)
if not os.path.exists(new_root):
os.makedirs(new_root)
Expand Down

0 comments on commit 05cb992

Please sign in to comment.