Skip to content

Commit

Permalink
Clean up CCI cache handling (#1238)
Browse files Browse the repository at this point in the history
1. Add `generate_cachekey` and `fetch_cachekey` command that abstracts away the way cache key is generated.
2. Fix the daily cache key. `date +%D` (the previous `"+%Y-%d"` does not change on daily basis)
3. Remove conda environment cache for simplicity (this is no longer the bottleneck and having cached conda env often causes more trouble when macOS test is enabled.)
  • Loading branch information
mthrok committed Mar 11, 2021
1 parent 34ccda8 commit 735d5e7
Show file tree
Hide file tree
Showing 2 changed files with 97 additions and 313 deletions.
204 changes: 48 additions & 156 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,21 @@ commands:
if [[ "${PYTHON_VERSION}" = *3.9* ]]; then
echo "export CONDA_CHANNEL_FLAGS=-c=conda-forge" >> ${BASH_ENV}
fi
generate_cachekey:
description: "Generate .cachekey file that changes on daily basis"
steps:
- run:
name: Generate CCI cache key
command: echo "$(date "+%D")" > .cachekey
- persist_to_workspace:
root: .
paths:
- .cachekey
fetch_cachekey:
description: "Fetch the .cachekey file that is generated by generate_cachekey job"
steps:
- attach_workspace:
at: .

binary_common: &binary_common
parameters:
Expand Down Expand Up @@ -356,64 +371,34 @@ jobs:
- checkout
- designate_upload_channel
- load_conda_channel_flags
- run:
name: Generate cache key
# This will refresh cache on Sundays, nightly build should generate new cache.
command: |
echo "$(date +"Year-Week %Y-%U")" > .circleci-weekly
echo "$(date +"Year-Date %Y-%d")" > .circleci-daily-linux
- persist_to_workspace:
root: .
paths:
- .circleci-daily-linux
- generate_cachekey
- restore_cache:
keys:

- data-linux-v1-{{ checksum ".circleci-daily-linux" }}
- v1-linux-cache-index-{{ checksum ".cachekey" }}

- run:
name: Exit if cache exists
name: Generate cache
no_output_timeout: 30m
command: |
FILE=.data/cache_status_file.json
if test -f "$FILE"; then
cat $FILE
circleci step halt
else
echo "$FILE does not exists"
if [ ! -f .data/cache_status_file.json ] ; then
.circleci/unittest/linux/scripts/setup_env.sh
.circleci/unittest/linux/scripts/install.sh
.circleci/unittest/linux/scripts/generate_cache.sh
fi
- restore_cache:

keys:
- env-v1-linux-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/linux/scripts/environment.yml" }}-{{ checksum ".circleci-weekly" }}

- run:
name: Setup
command: .circleci/unittest/linux/scripts/setup_env.sh
cat .data/cache_status_file.json
- save_cache:

key: env-v1-linux-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/linux/scripts/environment.yml" }}-{{ checksum ".circleci-weekly" }}
key: v1-linux-dataset-{{ checksum ".cachekey" }}

paths:
- conda
- env
- run:
name: Install torchtext
command: .circleci/unittest/linux/scripts/install.sh
- run:
name: Generate daily data Cache
no_output_timeout: 30m
command: |
.circleci/unittest/linux/scripts/generate_cache.sh
cat .data/cache_status_file.json
- .data
- save_cache:

key: data-linux-v1-{{ checksum ".circleci-daily-linux" }}
key: v1-linux-cache-index-{{ checksum ".cachekey" }}

paths:
- .data
- run:
name: Post process
command: .circleci/unittest/linux/scripts/post_process.sh
- .data/cache_status_file.json

unittest_linux:
<<: *binary_common
Expand All @@ -424,40 +409,18 @@ jobs:
- checkout
- designate_upload_channel
- load_conda_channel_flags
- attach_workspace:
at: .
- run:
name: Generate cache key
# This will refresh cache on Sundays, nightly build should generate new cache.
command: |
echo "$(date +"Year-Week %Y-%U")" > .circleci-weekly
- restore_cache:

keys:
- env-v1-linux-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/linux/scripts/environment.yml" }}-{{ checksum ".circleci-weekly" }}

- fetch_cachekey
- run:
name: Setup
command: .circleci/unittest/linux/scripts/setup_env.sh
- save_cache:

key: env-v1-linux-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/linux/scripts/environment.yml" }}-{{ checksum ".circleci-weekly" }}

paths:
- conda
- env
- run:
name: Install torchtext
command: .circleci/unittest/linux/scripts/install.sh
- restore_cache:
keys:

- data-linux-v1-{{ checksum ".circleci-daily-linux" }}

- restore_cache:
keys:

- data-vector-linux-v1-{{ checksum ".circleci-daily-linux" }}
- v1-linux-dataset-vector-{{ checksum ".cachekey" }}
- v1-linux-dataset-{{ checksum ".cachekey" }}

- run:
name: Run tests
Expand All @@ -466,7 +429,7 @@ jobs:
command: .circleci/unittest/linux/scripts/run_test.sh
- save_cache:

key: data-vector-linux-v1-{{ checksum ".circleci-daily-linux" }}
key: v1-linux-dataset-vector-{{ checksum ".cachekey" }}

paths:
- .vector_cache
Expand All @@ -485,67 +448,34 @@ jobs:
- checkout
- designate_upload_channel
- load_conda_channel_flags
- run:
name: Generate cache key
# This will refresh cache on Sundays, nightly build should generate new cache.
command: |
echo "$(date +"Year-Week %Y-%U")" > .circleci-weekly
echo "$(date +"Year-Date %Y-%d")" > .circleci-daily-win
- persist_to_workspace:
root: .
paths:
- .circleci-daily-win
- generate_cachekey
- restore_cache:
keys:

- data-windows-v1-{{ checksum ".circleci-daily-win" }}

- run:
name: Exit if cache exists
command: |
FILE=.data/cache_status_file.json
if test -f "$FILE"; then
cat $FILE
circleci step halt
else
echo "$FILE does not exists"
fi
- restore_cache:

keys:
- env-v1-windows-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/windows/scripts/environment.yml" }}-{{ checksum ".circleci-weekly" }}

- run:
name: Setup
command: .circleci/unittest/windows/scripts/setup_env.sh
- save_cache:

key: env-v1-windows-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/windows/scripts/environment.yml" }}-{{ checksum ".circleci-weekly" }}
- v1-windows-cache-index-{{ checksum ".cachekey" }}

paths:
- conda
- env
- run:
name: Install torchtext
command: .circleci/unittest/windows/scripts/install.sh
- run:
name: Generate daily data Cache
no_output_timeout: 30m
command: |
.circleci/unittest/windows/scripts/generate_cache.sh
if [ ! -f .data/cache_status_file.json ] ; then
.circleci/unittest/windows/scripts/setup_env.sh
.circleci/unittest/windows/scripts/install.sh
.circleci/unittest/windows/scripts/generate_cache.sh
fi
cat .data/cache_status_file.json
- save_cache:

key: data-windows-v1-{{ checksum ".circleci-daily-win" }}
key: v1-windows-dataset-{{ checksum ".cachekey" }}

paths:
- .data
- run:
name: Post process
command: .circleci/unittest/windows/scripts/post_process.sh

- save_cache:

key: v1-windows-cache-index-{{ checksum ".cachekey" }}

paths:
- .data/cache_status_file.json

unittest_windows:
<<: *binary_common
Expand All @@ -555,40 +485,18 @@ jobs:
- checkout
- designate_upload_channel
- load_conda_channel_flags
- attach_workspace:
at: .
- run:
name: Generate cache key
# This will refresh cache on Sundays, nightly build should generate new cache.
command: |
echo "$(date +"Year-Week %Y-%U")" > .circleci-weekly
- restore_cache:

keys:
- env-v1-windows-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/windows/scripts/environment.yml" }}-{{ checksum ".circleci-weekly" }}

- fetch_cachekey
- run:
name: Setup
command: .circleci/unittest/windows/scripts/setup_env.sh
- save_cache:

key: env-v1-windows-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/windows/scripts/environment.yml" }}-{{ checksum ".circleci-weekly" }}

paths:
- conda
- env
- run:
name: Install torchtext
command: .circleci/unittest/windows/scripts/install.sh
- restore_cache:
keys:

- data-windows-v1-{{ checksum ".circleci-daily-win" }}

- restore_cache:
keys:

- data-vector-windows-v1-{{ checksum ".circleci-daily-win" }}
- v1-windows-dataset-vector-{{ checksum ".cachekey" }}
- v1-windows-dataset-{{ checksum ".cachekey" }}


- run:
Expand All @@ -598,7 +506,7 @@ jobs:
command: .circleci/unittest/windows/scripts/run_test.sh
- save_cache:

key: data-vector-windows-v1-{{ checksum ".circleci-daily-win" }}
key: v1-windows-dataset-vector-{{ checksum ".cachekey" }}

paths:
- .vector_cache
Expand All @@ -617,25 +525,9 @@ jobs:
steps:
- checkout
- designate_upload_channel
- run:
name: Generate cache key
# This will refresh cache on Sundays, nightly build should generate new cache.
command: echo "$(date +"Year-Week %Y-%U")" > .circleci-weekly
- restore_cache:

keys:
- env-v1-linux-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/linux/scripts/environment.yml" }}-{{ checksum ".circleci-weekly" }}

- run:
name: Setup
command: .circleci/unittest/linux/scripts/setup_env.sh
- save_cache:

key: env-v1-linux-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/linux/scripts/environment.yml" }}-{{ checksum ".circleci-weekly" }}

paths:
- conda
- env
- run:
name: Run style check
command: .circleci/unittest/linux/scripts/run_style_checks.sh
Expand Down

0 comments on commit 735d5e7

Please sign in to comment.