From da9a92a8dddd8d9a07318c83a4f275b4b8797cf0 Mon Sep 17 00:00:00 2001 From: "B. Arman Aksoy" Date: Thu, 21 Jul 2016 16:59:46 -0400 Subject: [PATCH] override cache dir if PYENSEMBL_CACHE_DIR is set --- README.md | 19 +++++++++++++++++++ pyensembl/download_cache.py | 11 ++++++++--- test/test_download_cache.py | 15 +++++++++++++++ 3 files changed, 42 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 9699997..2c923b0 100644 --- a/README.md +++ b/README.md @@ -45,6 +45,25 @@ human reference data from Ensembl releases 75 and 76. Alternatively, you can create the `EnsemblRelease` object from inside a Python process and call `ensembl_object.download()` followed by `ensembl_object.index()`. +## Cache Location +By default, PyEnsembl uses the platform-specific `Cache` folder +and caches the files into the `pyensembl` sub-directory. +You can override this default by setting the environment key `PYENSEMBL_CACHE_DIR` +as your preferred location for caching: + +```sh +export PYENSEMBL_CACHE_DIR=/custom/cache/dir +``` + +or + +```python +import os + +os.environ['PYENSEMBL_CACHE_DIR'] = '/custom/cache/dir' +# ... PyEnsembl API usage +``` + # Non-Ensembl Data PyEnsembl also allows arbitrary genomes via the specification diff --git a/pyensembl/download_cache.py b/pyensembl/download_cache.py index 1155778..bf50f02 100644 --- a/pyensembl/download_cache.py +++ b/pyensembl/download_cache.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from os import listdir, remove +from os import listdir, remove, environ from os.path import join, exists, split, abspath from shutil import copy2, rmtree import logging @@ -20,6 +20,7 @@ import datacache CACHE_BASE_SUBDIR = "pyensembl" +ENV_KEY = "PYENSEMBL_CACHE_DIR" def cache_subdirectory( reference_name=None, @@ -111,8 +112,12 @@ def __init__( annotation_name=annotation_name, annotation_version=annotation_version) - self._cache_directory_path = datacache.get_data_dir( - subdir=self.cache_subdirectory) + if environ.get(ENV_KEY) is None: + self._cache_directory_path = datacache.get_data_dir( + subdir=self.cache_subdirectory) + else: + self._cache_directory_path = datacache.get_data_dir( + envkey=ENV_KEY) self.decompress_on_download = decompress_on_download self.copy_local_files_to_cache = copy_local_files_to_cache diff --git a/test/test_download_cache.py b/test/test_download_cache.py index 4431dfe..0156f4b 100644 --- a/test/test_download_cache.py +++ b/test/test_download_cache.py @@ -7,6 +7,11 @@ MissingRemoteFile ) +import os +import tempfile + +from .data import data_path + download_cache = DownloadCache( reference_name="__test_reference", annotation_name="__test_annotation", @@ -25,3 +30,13 @@ def test_download_cache_missing_remote_file(): with assert_raises(MissingRemoteFile): download_cache.download_or_copy_if_necessary( path_or_url="ftp://NOTAURL.NOTAURL.NOTAURL") + +def test_download_cache_custom_location(): + test_file = "refseq.ucsc.small.gtf" + tmp_dir = tempfile.gettempdir() + os.environ['PYENSEMBL_CACHE_DIR'] = tmp_dir + download_cache.delete_cache_directory() + download_cache.download_or_copy_if_necessary( + path_or_url=data_path(test_file)) + os.path.exists(os.path.join(tmp_dir, test_file)) + del os.environ['PYENSEMBL_CACHE_DIR']