From 88e010e59aee40c8e100ffa5243ae6a094b21cf5 Mon Sep 17 00:00:00 2001 From: Jeff Hull Date: Wed, 20 Dec 2017 16:07:21 -0500 Subject: [PATCH 01/20] use dataset_full_paths --- solvebio/cli/data.py | 23 +++++------------------ solvebio/cli/main.py | 39 ++++----------------------------------- solvebio/client.py | 1 + 3 files changed, 10 insertions(+), 53 deletions(-) diff --git a/solvebio/cli/data.py b/solvebio/cli/data.py index 5506bbb6..65634f32 100644 --- a/solvebio/cli/data.py +++ b/solvebio/cli/data.py @@ -26,11 +26,6 @@ def create_dataset(args): * capacity """ - if args.vault: - vault_name = args.vault - else: - vault_name = Vault.get_personal_vault().name - # Accept a template_id or a template_file if args.template_id: # Validate the template ID @@ -62,7 +57,7 @@ def create_dataset(args): print("A new dataset template was created with id: {0}".format(tpl.id)) else: print("Creating a new dataset {0} without a template." - .format(args.dataset_name)) + .format(args.dataset_full_path)) tpl = None fields = [] entity_type = None @@ -70,14 +65,14 @@ def create_dataset(args): if tpl: print("Creating new dataset {0} using the template '{1}'." - .format(args.dataset_name, tpl.name)) + .format(args.dataset_full_path, tpl.name)) fields = tpl.fields entity_type = tpl.entity_type # include template used to create description = 'Created with dataset template: {0}'.format(str(tpl.id)) return solvebio.Dataset.get_or_create_by_full_path( - ':'.join([vault_name, os.path.join(args.path, args.dataset_name)]), + args.dataset_full_path, capacity=args.capacity, entity_type=entity_type, fields=fields, @@ -278,25 +273,17 @@ def import_file(args): if not solvebio.api_key: solvebio.login() - if args.vault: - vault_name = args.vault - else: - vault_name = Vault.get_personal_vault().name - # Ensure the dataset exists. Create if necessary. if args.create_dataset: dataset = create_dataset(args) else: try: - full_path = solvebio.Dataset.make_full_path(vault_name, - args.path, - args.dataset_name) - dataset = solvebio.Dataset.get_by_full_path(full_path) + dataset = solvebio.Dataset.get_by_full_path(args.dataset_full_path) except solvebio.SolveError as e: if e.status_code != 404: raise e - print("Dataset not found: {0}".format(args.dataset_name)) + print("Dataset not found: {0}".format(args.dataset_full_path)) print("Tip: use the --create-dataset flag " "to create one from a template") sys.exit(1) diff --git a/solvebio/cli/main.py b/solvebio/cli/main.py index e367965e..171284ca 100644 --- a/solvebio/cli/main.py +++ b/solvebio/cli/main.py @@ -55,19 +55,6 @@ class SolveArgumentParser(argparse.ArgumentParser): 'action': 'store_true', 'help': 'Create the vault if it doesn\'t exist', }, - { - 'flags': '--vault', - 'help': 'The name of the vault to use when ' - 'creating a new dataset (via --create-dataset), ' - 'defaults to your personal vault', - }, - { - 'flags': '--path', - 'default': '/', - 'help': 'The path in the vault where the dataset should ' - 'be created when creating a new dataset' - '(via --create-dataset), defaults to "/"', - }, { 'flags': '--template-id', 'help': 'The template ID used when ' @@ -103,8 +90,8 @@ class SolveArgumentParser(argparse.ArgumentParser): 'Options are "append" (default) or "overwrite".' }, { - 'name': 'dataset_name', - 'help': 'The name of the dataset' + 'name': 'dataset_full_path', + 'help': 'The full path to the dataset in the following format: ::' # noqa }, { 'name': 'file', @@ -122,19 +109,6 @@ class SolveArgumentParser(argparse.ArgumentParser): 'action': 'store_true', 'help': 'Create the vault if it doesn\'t exist', }, - { - 'flags': '--vault', - 'help': 'The name of the vault to use when ' - 'creating a new dataset (via --create-dataset), ' - 'defaults to your personal vault', - }, - { - 'flags': '--path', - 'default': '/', - 'help': 'The path in the vault where the dataset should ' - 'be created when creating a new dataset ' - '(via --create-dataset), defaults to "/"', - }, { 'flags': '--template-id', 'help': 'The template ID used when ' @@ -145,11 +119,6 @@ class SolveArgumentParser(argparse.ArgumentParser): 'help': 'A local template file to be used when ' 'creating a new dataset (via --create-dataset)', }, - { - 'flags': '--genome-build', - 'help': 'If the dataset template is genomic, provide a ' - 'genome build for your data (i.e. GRCh37)' - }, { 'flags': '--capacity', 'default': 'small', @@ -158,8 +127,8 @@ class SolveArgumentParser(argparse.ArgumentParser): 'medium (<500M), large (>=500M)' }, { - 'name': 'dataset_name', - 'help': 'The name of the dataset' + 'name': 'dataset_full_path', + 'help': 'The full path to the dataset in the following format: ::' # noqa } ] }, diff --git a/solvebio/client.py b/solvebio/client.py index 0f3d1482..9945b945 100644 --- a/solvebio/client.py +++ b/solvebio/client.py @@ -275,4 +275,5 @@ def _log_raw_request(self, method, url, **kwargs): def __repr__(self): return ''.format(self._host, self._auth.token) + client = SolveClient(include_resources=False) From c30974a399279e3040b0e5e5f8a57e9ea297307f Mon Sep 17 00:00:00 2001 From: Jeff Hull Date: Wed, 20 Dec 2017 16:29:00 -0500 Subject: [PATCH 02/20] fix test --- solvebio/test/test_shortcuts.py | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/solvebio/test/test_shortcuts.py b/solvebio/test/test_shortcuts.py index 04321fec..30639038 100644 --- a/solvebio/test/test_shortcuts.py +++ b/solvebio/test/test_shortcuts.py @@ -34,11 +34,8 @@ def test_create_dataset(self, DatasetCreate, ObjectAll, VaultAll): DatasetCreate.side_effect = fake_dataset_create ObjectAll.side_effect = fake_object_all VaultAll.side_effect = fake_vault_all - - args = ['create-dataset', 'test-dataset', - '--capacity', 'small', - '--vault', 'test', - '--path', '/'] # noqa + args = ['create-dataset', 'solvebio:test_vault:/test-dataset', + '--capacity', 'small'] # noqa ds = main.main(args) self.assertEqual(ds.name, 'test-dataset') self.assertEqual(ds.path, '/test-dataset') @@ -68,11 +65,9 @@ def test_create_dataset_upload_template(self, TmplCreate, template_path = os.path.join(os.path.dirname(__file__), "data/template.json") - args = ['create-dataset', 'test-dataset', + args = ['create-dataset', 'solvebio:test_vault:/test-dataset', '--template-file', template_path, - '--capacity', 'medium', - '--vault', 'test', - '--path', '/'] # noqa + '--capacity', 'medium'] # noqa ds = main.main(args) self.assertEqual(ds.description, @@ -96,11 +91,9 @@ def test_create_dataset_template_id(self, TmplRetrieve, DatasetCreate, tpl_json = json.load(fp) tpl = DatasetTemplate.create(**tpl_json) - args = ['create-dataset', 'test-dataset', + args = ['create-dataset', 'solvebio:test_vault:/test-dataset', '--template-id', str(tpl.id), - '--capacity', 'small', - '--vault', 'test', - '--path', '/'] # noqa + '--capacity', 'small'] # noqa ds = main.main(args) self.assertEqual(ds.description, From 0e9e15a6fc143323d6fa7a93556689ac145f4c80 Mon Sep 17 00:00:00 2001 From: Jeff Hull Date: Thu, 21 Dec 2017 11:36:24 -0500 Subject: [PATCH 03/20] use full paths in upload command as well --- solvebio/cli/data.py | 128 ++++++++++++----------------------- solvebio/cli/main.py | 10 +-- solvebio/resource/dataset.py | 27 +------- solvebio/resource/object.py | 38 +++++++++++ 4 files changed, 85 insertions(+), 118 deletions(-) diff --git a/solvebio/cli/data.py b/solvebio/cli/data.py index 65634f32..fe233951 100644 --- a/solvebio/cli/data.py +++ b/solvebio/cli/data.py @@ -10,8 +10,8 @@ import solvebio -from solvebio import Object, Vault -from solvebio.client import client +from solvebio import Vault +from solvebio import Object from solvebio.utils.files import check_gzip_path from solvebio.errors import ObjectTypeError, NotFoundError @@ -81,10 +81,6 @@ def create_dataset(args): ) -def _make_full_path(s1, s2, s3): - return ':'.join([s1, s2, s3]) - - def _assert_object_type(obj, object_type): if obj.object_type != object_type: raise ObjectTypeError('{0} is a {1} but must be a folder'.format( @@ -98,27 +94,18 @@ def upload(args): Given a folder or file, upload all the folders and files contained within it, skipping ones that already exist on the remote. """ - if args.vault: - vault_name = args.vault - else: - vault_name = Vault.get_personal_vault().name - # '--path /remote/path1 local/path2' - # base_remote_path = /remote/path1 - # base_local_path = local/path2 - # local_shart = 'path2' - base_remote_path = args.path base_local_paths = args.local_path + base_remote_path, path_dict = Object._to_full_path_helper(args.path) + vault_path = path_dict['domain'] + ':' + path_dict['vault'] - user = client.get('/v1/user', {}) - domain = user['account']['domain'] + # assert vault exists + vault = Vault.get_by_full_path(vault_path) - vaults = Vault.all(name=vault_name) - - if len(vaults.data) == 0: - raise Exception('Vault not found with name "{0}"'.format(vault_name)) - else: - vault = vaults.data[0] + # If not the vault root, validate remote path exists and is a folder + if path_dict['path'] != '/': + _assert_object_type(Object.get_by_full_path( + base_remote_path), 'folder') for local_path in base_local_paths: @@ -126,55 +113,40 @@ def upload(args): local_start = os.path.basename(local_path) if os.path.isdir(local_path): - _upload_folder(domain, vault, base_remote_path, - local_path, local_start) + _upload_folder(path_dict['domain'], vault, + base_remote_path, local_path, local_start) else: - if base_remote_path != '/': - base_full_remote_path = _make_full_path( - domain, - vault.name, - base_remote_path, - ) - base_remote_object = Object.get_by_full_path( - base_full_remote_path) - _assert_object_type(base_remote_object, 'folder') - base_remote_path = base_remote_object.path - else: - base_remote_path = '/' + Object.upload_file( + local_path, path_dict['path'], path_dict['vault']) - Object.upload_file(local_path, base_remote_path, - vault.name) +def _upload_folder(domain, vault, base_remote_path, + base_local_path, local_start): -def _upload_folder(domain, vault, base_remote_path, base_local_path, - local_start): - # Create the root folder if it does not exist on the remote + # Create the upload root folder if it does not exist on the remote try: - full_root_path = _make_full_path( - domain, - vault.name, - os.path.join(base_remote_path, local_start), + upload_root_path, _ = Object._to_full_path_helper( + os.path.join(base_remote_path, local_start) ) - root_object = Object.get_by_full_path(full_root_path) - _assert_object_type(root_object, 'folder') + obj = Object.get_by_full_path(upload_root_path) + _assert_object_type(obj, 'folder') except NotFoundError: - if base_remote_path == '/': + base_remote_path, path_dict = \ + Object._to_full_path_helper(base_remote_path) + + if path_dict['path'] == '/': parent_object_id = None else: - base_remote_full_path = _make_full_path( - domain, - vault.name, - base_remote_path, - ) - obj = Object.get_by_full_path(base_remote_full_path) + obj = Object.get_by_full_path(base_remote_path) _assert_object_type(obj, 'folder') parent_object_id = obj.id + # Create base folder new_folder = Object.create( vault_id=vault.id, parent_object_id=parent_object_id, object_type='folder', - filename=local_start, + filename=local_start ) print('Notice: Folder created for {0} at {1}'.format( @@ -191,19 +163,15 @@ def _upload_folder(domain, vault, base_remote_path, base_local_path, re.sub('^' + os.path.dirname(base_local_path), '', root).lstrip('/'), # noqa d ) - full_path = _make_full_path(domain, vault.name, dirpath) try: - Object.get_by_full_path(full_path, object_type='folder') + Object.get_by_full_path(dirpath, object_type='folder') except NotFoundError: # Create the folder - if os.path.dirname(dirpath) == '/': + if os.path.dirname(dirpath.split(':')[-1]) == '/': parent_object_id = None else: - parent_full_path = _make_full_path( - domain, vault.name, - os.path.dirname(dirpath)) - + parent_full_path = os.path.dirname(dirpath) parent = Object.get_by_full_path(parent_full_path) _assert_object_type(parent, 'folder') parent_object_id = parent.id @@ -221,31 +189,23 @@ def _upload_folder(domain, vault, base_remote_path, base_local_path, # Upload the files that do not yet exist on the remote for f in files: - file_full_path = _make_full_path( - domain, - vault.name, - os.path.join( - base_remote_path, - re.sub('^' + os.path.dirname(base_local_path), - '', - root).lstrip('/'), - f, - ) + file_full_path = os.path.join( + base_remote_path, + re.sub('^' + os.path.dirname(base_local_path), + '', + root).lstrip('/'), + f, ) try: Object.get_by_full_path(file_full_path) except NotFoundError: - parent_full_path = _make_full_path( - domain, - vault.name, - os.path.dirname( - os.path.join( - base_remote_path, - re.sub('^' + os.path.dirname(base_local_path), - '', - root).lstrip('/'), - f, - ) + parent_full_path = os.path.dirname( + os.path.join( + base_remote_path, + re.sub('^' + os.path.dirname(base_local_path), + '', + root).lstrip('/'), + f, ) ) parent = Object.get_by_full_path(parent_full_path) diff --git a/solvebio/cli/main.py b/solvebio/cli/main.py index 171284ca..729068b5 100644 --- a/solvebio/cli/main.py +++ b/solvebio/cli/main.py @@ -136,17 +136,11 @@ class SolveArgumentParser(argparse.ArgumentParser): 'func': data.upload, 'help': 'Upload a file or directory to a SolveBio Vault', 'arguments': [ - { - 'flags': '--vault', - 'help': 'The name of the vault to use when ' - 'creating a new dataset (via --create-dataset), ' - 'defaults to your personal vault', - }, { 'flags': '--path', 'default': '/', - 'help': 'The path in the vault where the files and ' - 'folders should be created, defaults to "/"', + 'help': 'The full path where the files and folders should ' + 'be created, defaults to ":/"', }, { 'name': 'local_path', diff --git a/solvebio/resource/dataset.py b/solvebio/resource/dataset.py index 0697ff5a..15a25431 100644 --- a/solvebio/resource/dataset.py +++ b/solvebio/resource/dataset.py @@ -61,32 +61,8 @@ def make_full_path(cls, vault_name, path, name, **kwargs): @classmethod def get_by_full_path(cls, full_path, **kwargs): from solvebio import Object - _client = kwargs.pop('client', None) or cls._client or client - parts = full_path.split(':', 2) - - if len(parts) == 3: - account_domain, vault_name, object_path = parts - elif len(parts) == 2: - vault_name, object_path = parts - user = _client.get('/v1/user', {}) - account_domain = user['account']['domain'] - else: - raise Exception('Full path must be of the format: ' - '"vault_name:object_path" or ' - '"account_domain:vault_name:object_path"') - - if object_path[0] != '/': - raise Exception( - 'Paths are absolute and must begin with a "/"' - ) - - # Remove double slashes and strip trailing slash - object_path = re.sub('//+', '/', object_path) - if object_path != '/': - object_path = object_path.rstrip('/') - - test_path = ':'.join([account_domain, vault_name, object_path]) + test_path, _ = Object._to_full_path_helper(full_path, client=_client) obj = Object.get_by_full_path(test_path, client=_client) dataset = Dataset.retrieve(obj['dataset_id'], client=_client, **kwargs) return dataset @@ -106,7 +82,6 @@ def get_or_create_by_full_path(cls, full_path, **kwargs): pass # Dataset not found, create it step-by-step - parts = full_path.split(':', 2) if len(parts) == 3: diff --git a/solvebio/resource/object.py b/solvebio/resource/object.py index 0f14ec0b..2096a545 100644 --- a/solvebio/resource/object.py +++ b/solvebio/resource/object.py @@ -1,5 +1,6 @@ """Solvebio Object API resource""" import os +import re import base64 import binascii import mimetypes @@ -41,6 +42,43 @@ class Object(CreateableAPIResource, ('description', 'Description'), ) + @classmethod + def _to_full_path_helper(cls, full_path, **kwargs): + """ Helper method to return full path + + If no vault, use personal vault. + + """ + _client = kwargs.pop('client', None) or cls._client or client + parts = full_path.split(':', 2) + + if len(parts) == 3: + account_domain, vault_name, object_path = parts + else: + user = _client.get('/v1/user', {}) + account_domain = user['account']['domain'] + if len(parts) == 2: + vault_name, object_path = parts + else: + vault_name = 'user-{}'.format(user['id']) + object_path = full_path or '/' + + if object_path[0] != '/': + raise Exception( + 'Paths {} are absolute and must begin with a "/"' + .format(object_path) + ) + + # Remove double slashes and strip trailing slash + object_path = re.sub('//+', '/', object_path) + if object_path != '/': + object_path = object_path.rstrip('/') + + path = ':'.join([account_domain, vault_name, object_path]) + return path, dict(domain=account_domain, + vault=vault_name, + path=object_path) + @classmethod def get_by_full_path(cls, full_path, **params): params.update({'full_path': full_path}) From 8a634e4d19614bff7b72a5800c07b6f58fe44676 Mon Sep 17 00:00:00 2001 From: Jeff Hull Date: Thu, 21 Dec 2017 13:12:52 -0500 Subject: [PATCH 04/20] rename vars/func, add vault path validator --- solvebio/cli/data.py | 6 ++-- solvebio/cli/main.py | 2 +- solvebio/resource/dataset.py | 4 +-- solvebio/resource/object.py | 21 ++++++------ solvebio/resource/vault.py | 64 +++++++++++++++++++----------------- 5 files changed, 50 insertions(+), 47 deletions(-) diff --git a/solvebio/cli/data.py b/solvebio/cli/data.py index fe233951..2586e913 100644 --- a/solvebio/cli/data.py +++ b/solvebio/cli/data.py @@ -96,7 +96,7 @@ def upload(args): """ base_local_paths = args.local_path - base_remote_path, path_dict = Object._to_full_path_helper(args.path) + base_remote_path, path_dict = Object.validate_path(args.path) vault_path = path_dict['domain'] + ':' + path_dict['vault'] # assert vault exists @@ -125,14 +125,14 @@ def _upload_folder(domain, vault, base_remote_path, # Create the upload root folder if it does not exist on the remote try: - upload_root_path, _ = Object._to_full_path_helper( + upload_root_path, _ = Object.validate_path( os.path.join(base_remote_path, local_start) ) obj = Object.get_by_full_path(upload_root_path) _assert_object_type(obj, 'folder') except NotFoundError: base_remote_path, path_dict = \ - Object._to_full_path_helper(base_remote_path) + Object.validate_path(base_remote_path) if path_dict['path'] == '/': parent_object_id = None diff --git a/solvebio/cli/main.py b/solvebio/cli/main.py index 729068b5..348e5a87 100644 --- a/solvebio/cli/main.py +++ b/solvebio/cli/main.py @@ -91,7 +91,7 @@ class SolveArgumentParser(argparse.ArgumentParser): }, { 'name': 'dataset_full_path', - 'help': 'The full path to the dataset in the following format: ::' # noqa + 'help': 'The full path to the dataset in the following format: "domain:vault:/path/variants"' # noqa }, { 'name': 'file', diff --git a/solvebio/resource/dataset.py b/solvebio/resource/dataset.py index 15a25431..be16bb0b 100644 --- a/solvebio/resource/dataset.py +++ b/solvebio/resource/dataset.py @@ -1,5 +1,4 @@ import os -import re import time from ..client import client @@ -40,6 +39,7 @@ class Dataset(CreateableAPIResource, @classmethod def make_full_path(cls, vault_name, path, name, **kwargs): from solvebio import SolveError +w _client = kwargs.pop('client', None) or cls._client or client @@ -62,7 +62,7 @@ def make_full_path(cls, vault_name, path, name, **kwargs): def get_by_full_path(cls, full_path, **kwargs): from solvebio import Object _client = kwargs.pop('client', None) or cls._client or client - test_path, _ = Object._to_full_path_helper(full_path, client=_client) + test_path, _ = Object.validate_path(full_path, client=_client) obj = Object.get_by_full_path(test_path, client=_client) dataset = Dataset.retrieve(obj['dataset_id'], client=_client, **kwargs) return dataset diff --git a/solvebio/resource/object.py b/solvebio/resource/object.py index 2096a545..c8450631 100644 --- a/solvebio/resource/object.py +++ b/solvebio/resource/object.py @@ -43,14 +43,15 @@ class Object(CreateableAPIResource, ) @classmethod - def _to_full_path_helper(cls, full_path, **kwargs): - """ Helper method to return full path - - If no vault, use personal vault. + def validate_path(cls, path, **kwargs): + """ Helper method to return a full path + If no account_domain, assumes user's account domain + If no vault, uses personal vault. + If no path, uses / """ _client = kwargs.pop('client', None) or cls._client or client - parts = full_path.split(':', 2) + parts = path.split(':', 2) if len(parts) == 3: account_domain, vault_name, object_path = parts @@ -61,7 +62,7 @@ def _to_full_path_helper(cls, full_path, **kwargs): vault_name, object_path = parts else: vault_name = 'user-{}'.format(user['id']) - object_path = full_path or '/' + object_path = path or '/' if object_path[0] != '/': raise Exception( @@ -74,10 +75,10 @@ def _to_full_path_helper(cls, full_path, **kwargs): if object_path != '/': object_path = object_path.rstrip('/') - path = ':'.join([account_domain, vault_name, object_path]) - return path, dict(domain=account_domain, - vault=vault_name, - path=object_path) + full_path = ':'.join([account_domain, vault_name, object_path]) + return full_path, dict(domain=account_domain, + vault=vault_name, + path=object_path) @classmethod def get_by_full_path(cls, full_path, **params): diff --git a/solvebio/resource/vault.py b/solvebio/resource/vault.py index 062188b7..0bd01ff6 100644 --- a/solvebio/resource/vault.py +++ b/solvebio/resource/vault.py @@ -44,6 +44,27 @@ def _object_list_helper(self, **params): items = Object.all(client=self._client, **params) return items + @classmethod + def validate_path(cls, path, **kwargs): + """ Helper method to return a full path + + If no account_domain, assumes user's account domain + If no vault, uses personal vault. + """ + _client = kwargs.pop('client', None) or cls._client or client + parts = path.split(':', 1) + + if len(parts) == 2: + account_domain, vault_name = parts + else: + user = _client.get('/v1/user', {}) + account_domain = user['account']['domain'] + vault_name = '{}:{}'.format(account_domain, path) + + full_path = ':'.join([account_domain, vault_name]) + return full_path, dict(domain=account_domain, + vault=vault_name) + def files(self, **params): return self._object_list_helper(object_type='file', **params) @@ -68,15 +89,9 @@ def create_dataset(self, name, **params): if path == '/' or path is None: params['vault_parent_object_id'] = None else: - user = self._client.get('/v1/user', {}) - account_domain = user['account']['domain'] - - parent_object = Object.get_by_full_path(':'.join([ - account_domain, - self.name, - path, - ])) - + parent_object = Object.get_by_full_path( + ':'.join([self.full_path, path]) + ) params['vault_parent_object_id'] = parent_object.id params['name'] = name @@ -104,28 +119,16 @@ def search(self, query, **params): @classmethod def get_by_full_path(cls, full_path, **kwargs): - from solvebio import SolveError - _client = kwargs.pop('client', None) or cls._client or client - - parts = full_path.split(':') - - if len(parts) == 1 or len(parts) == 2: - if len(parts) == 1: - try: - user = _client.get('/v1/user', {}) - account_domain = user['account']['domain'] - except SolveError as e: - raise Exception("Error obtaining account domain: " - "{0}".format(e)) - else: - account_domain, full_path = parts - - return Vault._retrieve_helper('vault', 'name', parts[-1], - account_domain=account_domain, - name=parts[-1], - client=_client) - else: + try: + full_path, parts = cls.validate_path(full_path) + return Vault._retrieve_helper( + 'vault', 'name', parts['vault'], + account_domain=parts['domain'], + name=parts['vault'], + client=_client + ) + except: raise Exception('Full path must be of the form "vault_name" or ' '"account_domain:vault_name"') @@ -139,7 +142,6 @@ def get_or_create_by_full_path(cls, full_path, **kwargs): pass # Vault not found, create it - parts = full_path.split(':', 2) vault_name = parts[-1] From c30d1875c7a37ba8e73270e53acfcc57eaad60a1 Mon Sep 17 00:00:00 2001 From: Jeff Hull Date: Thu, 21 Dec 2017 13:26:12 -0500 Subject: [PATCH 05/20] typo --- solvebio/resource/dataset.py | 1 - 1 file changed, 1 deletion(-) diff --git a/solvebio/resource/dataset.py b/solvebio/resource/dataset.py index be16bb0b..390afdbb 100644 --- a/solvebio/resource/dataset.py +++ b/solvebio/resource/dataset.py @@ -39,7 +39,6 @@ class Dataset(CreateableAPIResource, @classmethod def make_full_path(cls, vault_name, path, name, **kwargs): from solvebio import SolveError -w _client = kwargs.pop('client', None) or cls._client or client From 7afec18f6d483dae11551b357eba395a75409b0d Mon Sep 17 00:00:00 2001 From: Jeff Hull Date: Thu, 21 Dec 2017 18:23:45 -0500 Subject: [PATCH 06/20] add tests --- solvebio/resource/object.py | 35 ++++++++++++++++++---------- solvebio/resource/vault.py | 15 +++++++++--- solvebio/test/test_object.py | 45 ++++++++++++++++++++++++++++++++++++ solvebio/test/test_vault.py | 29 ++++++++++++++++++++++- 4 files changed, 108 insertions(+), 16 deletions(-) create mode 100644 solvebio/test/test_object.py diff --git a/solvebio/resource/object.py b/solvebio/resource/object.py index c8450631..4820793d 100644 --- a/solvebio/resource/object.py +++ b/solvebio/resource/object.py @@ -51,27 +51,38 @@ def validate_path(cls, path, **kwargs): If no path, uses / """ _client = kwargs.pop('client', None) or cls._client or client - parts = path.split(':', 2) + # Remove double slashes and leading ':' + path = re.sub('//+', '/', path.lstrip(':')) + + parts = path.split(':', 2) if len(parts) == 3: account_domain, vault_name, object_path = parts + elif len(parts) == 2: + # if no slash assume user means root + if '/' not in parts[1]: + account_domain, vault_name = parts + object_path = '/' + else: + # if second part begins with slash, assume missing domain + if parts[1][0] == '/': + account_domain = \ + _client.get('/v1/user', {})['account']['domain'] + vault_name, object_path = parts + # else assumes missing ":" between vault and path + else: + account_domain = parts[0] + vault_name, object_path = parts[1].split('/', 1) else: user = _client.get('/v1/user', {}) account_domain = user['account']['domain'] - if len(parts) == 2: - vault_name, object_path = parts - else: - vault_name = 'user-{}'.format(user['id']) - object_path = path or '/' + vault_name = 'user-{}'.format(user['id']) + object_path = path or '/' if object_path[0] != '/': - raise Exception( - 'Paths {} are absolute and must begin with a "/"' - .format(object_path) - ) + object_path = '/' + object_path - # Remove double slashes and strip trailing slash - object_path = re.sub('//+', '/', object_path) + # Strip trailing slash if object_path != '/': object_path = object_path.rstrip('/') diff --git a/solvebio/resource/vault.py b/solvebio/resource/vault.py index 0bd01ff6..a4b016a5 100644 --- a/solvebio/resource/vault.py +++ b/solvebio/resource/vault.py @@ -52,14 +52,23 @@ def validate_path(cls, path, **kwargs): If no vault, uses personal vault. """ _client = kwargs.pop('client', None) or cls._client or client - parts = path.split(':', 1) + parts = path.split(':') - if len(parts) == 2: + if not path or len(parts) > 2: + raise Exception( + 'Vault path "{}" invalid. Must be of the format: ' + '"account_domain:vault_name".'.format(path) + ) + elif len(parts) == 2: account_domain, vault_name = parts else: user = _client.get('/v1/user', {}) account_domain = user['account']['domain'] - vault_name = '{}:{}'.format(account_domain, path) + vault_name = path + + # Strip any paths from the vault_name + if '/' in vault_name: + vault_name = vault_name.split('/', 1)[0] full_path = ':'.join([account_domain, vault_name]) return full_path, dict(domain=account_domain, diff --git a/solvebio/test/test_object.py b/solvebio/test/test_object.py new file mode 100644 index 00000000..7c9f9328 --- /dev/null +++ b/solvebio/test/test_object.py @@ -0,0 +1,45 @@ +from __future__ import absolute_import +# from solvebio.resource import Vault + +from .helper import SolveBioTestCase + + +class ObjectTests(SolveBioTestCase): + + def test_object_paths(self): + vaults = self.client.Vault.all() + for vault in vaults: + for file_ in list(vault.ls().solve_objects())[:5]: + o_path, _ = self.client.Object.validate_path(file_.full_path) + self.assertEqual(o_path, file_.full_path) + + def test_object_path_cases(self): + + user = self.client.User.retrieve() + domain = user.account.domain + user_vault = '{}:user-{}'.format(domain, user.id) + test_cases = [ + ['{}:myVault'.format(domain), '{}:myVault:/'.format(domain)], + ['acme:myVault', 'acme:myVault:/'], + ['myVault', '{}:/myVault'.format(user_vault)], + ['acme:myVault:/uploads_folder', 'acme:myVault:/uploads_folder'], + ['acme:myVault:/uploads_folder', 'acme:myVault:/uploads_folder'], + ['acme:myVault/uploads_folder', 'acme:myVault:/uploads_folder'], + ['myVault:/uploads_folder', '{}:myVault:/uploads_folder'.format(domain)], # noqa + ['/uploads_folder', '{}:/uploads_folder'.format(user_vault)], + [':/uploads_folder', '{}:/uploads_folder'.format(user_vault)], + ['myVault/uploads_folder', '{}:/myVault/uploads_folder'.format(user_vault)], # noqa + ] + for case, expected in test_cases: + print case + p, _ = self.client.Object.validate_path(case) + self.assertEqual(p, expected) + + error_test_cases = [ + '', + 'myDomain:myVault:/the/heack', + 'oops:myDomain:myVault', + ] + for case in error_test_cases: + with self.assertRaises(Exception): + v, v_paths = self.client.Vault.validate_path(case) diff --git a/solvebio/test/test_vault.py b/solvebio/test/test_vault.py index b0955b7b..63ee9649 100644 --- a/solvebio/test/test_vault.py +++ b/solvebio/test/test_vault.py @@ -1,5 +1,4 @@ from __future__ import absolute_import -# from solvebio.resource import Vault from .helper import SolveBioTestCase @@ -27,3 +26,31 @@ def test_vaults(self): for f in check_fields: self.assertTrue(f in vault, '{0} field is present'.format(f)) + + def test_vault_paths(self): + vaults = self.client.Vault.all() + for vault in vaults: + v, v_paths = self.client.Vault.validate_path(vault.full_path) + self.assertEqual(v, vault.full_path) + + domain = self.client.User.retrieve().account.domain + test_cases = [ + ['myVault', '{}:myVault'.format(domain)], + ['{}:myVault'.format(domain), '{}:myVault'.format(domain)], + ['acme:myVault', 'acme:myVault'], + # this assumes user f-ed and forgot the semi-colon for path + ['acme:myVault/uploads_folder', 'acme:myVault'], + ['myVault/uploads_folder', '{}:myVault'.format(domain)], + ] + for case, expected in test_cases: + v, v_paths = self.client.Vault.validate_path(case) + self.assertEqual(v, expected) + + error_test_cases = [ + '', + 'myDomain:myVault:/the/heack', + 'oops:myDomain:myVault', + ] + for case in error_test_cases: + with self.assertRaises(Exception): + v, v_paths = self.client.Vault.validate_path(case) From 60bf5a7ef1c9671a72aeb72db5ee3829604a6ba6 Mon Sep 17 00:00:00 2001 From: Jeff Hull Date: Thu, 21 Dec 2017 19:03:52 -0500 Subject: [PATCH 07/20] update readme --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index ff903ecb..008d1d53 100644 --- a/README.md +++ b/README.md @@ -234,11 +234,11 @@ remained unchanged. The `objects` property of a resource has been renamed `solve_objects`. 6. The `import` and `create-dataset` command-line utilities now require -`--vault` and `--path` arguments. The `dataset` argument (`test-dataset` +`--dataset_full_path` arguments. The `dataset` argument (`test-dataset` below) no longer can contain slashes. ``` -create-dataset --capacity=small --vault=test --path=/ test-dataset +create-dataset --capacity=small --dataset_ful_path=acme:test:/examples test-dataset ``` 7. Removal of DatasetCommit approval. The `auto_approve`, `is_approved` and From 87055b46644fa3843e87e75a045e0c8786a318ff Mon Sep 17 00:00:00 2001 From: Jeff Hull Date: Thu, 21 Dec 2017 19:17:21 -0500 Subject: [PATCH 08/20] python3 syntax --- solvebio/resource/object.py | 2 +- solvebio/test/test_object.py | 14 +++++++------- solvebio/test/test_vault.py | 6 +++--- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/solvebio/resource/object.py b/solvebio/resource/object.py index 4820793d..b333ace0 100644 --- a/solvebio/resource/object.py +++ b/solvebio/resource/object.py @@ -76,7 +76,7 @@ def validate_path(cls, path, **kwargs): else: user = _client.get('/v1/user', {}) account_domain = user['account']['domain'] - vault_name = 'user-{}'.format(user['id']) + vault_name = 'user-{0}'.format(user['id']) object_path = path or '/' if object_path[0] != '/': diff --git a/solvebio/test/test_object.py b/solvebio/test/test_object.py index 7c9f9328..0597b937 100644 --- a/solvebio/test/test_object.py +++ b/solvebio/test/test_object.py @@ -17,18 +17,18 @@ def test_object_path_cases(self): user = self.client.User.retrieve() domain = user.account.domain - user_vault = '{}:user-{}'.format(domain, user.id) + user_vault = '{0}:user-{1}'.format(domain, user.id) test_cases = [ - ['{}:myVault'.format(domain), '{}:myVault:/'.format(domain)], + ['{0}:myVault'.format(domain), '{0}:myVault:/'.format(domain)], ['acme:myVault', 'acme:myVault:/'], - ['myVault', '{}:/myVault'.format(user_vault)], + ['myVault', '{0}:/myVault'.format(user_vault)], ['acme:myVault:/uploads_folder', 'acme:myVault:/uploads_folder'], ['acme:myVault:/uploads_folder', 'acme:myVault:/uploads_folder'], ['acme:myVault/uploads_folder', 'acme:myVault:/uploads_folder'], - ['myVault:/uploads_folder', '{}:myVault:/uploads_folder'.format(domain)], # noqa - ['/uploads_folder', '{}:/uploads_folder'.format(user_vault)], - [':/uploads_folder', '{}:/uploads_folder'.format(user_vault)], - ['myVault/uploads_folder', '{}:/myVault/uploads_folder'.format(user_vault)], # noqa + ['myVault:/uploads_folder', '{0}:myVault:/uploads_folder'.format(domain)], # noqa + ['/uploads_folder', '{0}:/uploads_folder'.format(user_vault)], + [':/uploads_folder', '{0}:/uploads_folder'.format(user_vault)], + ['myVault/uploads_folder', '{0}:/myVault/uploads_folder'.format(user_vault)], # noqa ] for case, expected in test_cases: print case diff --git a/solvebio/test/test_vault.py b/solvebio/test/test_vault.py index 63ee9649..d5c0c6c3 100644 --- a/solvebio/test/test_vault.py +++ b/solvebio/test/test_vault.py @@ -35,12 +35,12 @@ def test_vault_paths(self): domain = self.client.User.retrieve().account.domain test_cases = [ - ['myVault', '{}:myVault'.format(domain)], - ['{}:myVault'.format(domain), '{}:myVault'.format(domain)], + ['myVault', '{0}:myVault'.format(domain)], + ['{0}:myVault'.format(domain), '{0}:myVault'.format(domain)], ['acme:myVault', 'acme:myVault'], # this assumes user f-ed and forgot the semi-colon for path ['acme:myVault/uploads_folder', 'acme:myVault'], - ['myVault/uploads_folder', '{}:myVault'.format(domain)], + ['myVault/uploads_folder', '{0}:myVault'.format(domain)], ] for case, expected in test_cases: v, v_paths = self.client.Vault.validate_path(case) From 6e5a2ebbac96febe02dbe519553e64c5448671a3 Mon Sep 17 00:00:00 2001 From: Jeff Hull Date: Thu, 21 Dec 2017 19:28:31 -0500 Subject: [PATCH 09/20] remove print --- solvebio/test/test_object.py | 1 - 1 file changed, 1 deletion(-) diff --git a/solvebio/test/test_object.py b/solvebio/test/test_object.py index 0597b937..4bd1c8b3 100644 --- a/solvebio/test/test_object.py +++ b/solvebio/test/test_object.py @@ -31,7 +31,6 @@ def test_object_path_cases(self): ['myVault/uploads_folder', '{0}:/myVault/uploads_folder'.format(user_vault)], # noqa ] for case, expected in test_cases: - print case p, _ = self.client.Object.validate_path(case) self.assertEqual(p, expected) From e36c0f34d3b87c5ca1a724bf662c376cb92d888d Mon Sep 17 00:00:00 2001 From: Jeff Hull Date: Thu, 28 Dec 2017 16:07:44 -0500 Subject: [PATCH 10/20] standardize path variable to full-path/full_path and add deprecations --- README.md | 4 +-- solvebio/cli/data.py | 68 +++++++++++++++++++++++++++++++++++++------- solvebio/cli/main.py | 40 +++++++++++++++++++++++--- 3 files changed, 95 insertions(+), 17 deletions(-) diff --git a/README.md b/README.md index 008d1d53..72e674ab 100644 --- a/README.md +++ b/README.md @@ -234,11 +234,11 @@ remained unchanged. The `objects` property of a resource has been renamed `solve_objects`. 6. The `import` and `create-dataset` command-line utilities now require -`--dataset_full_path` arguments. The `dataset` argument (`test-dataset` +`--full-path` arguments. The `dataset` argument (`test-dataset` below) no longer can contain slashes. ``` -create-dataset --capacity=small --dataset_ful_path=acme:test:/examples test-dataset +create-dataset --capacity=small --full-path=acme:test:/examples test-dataset ``` 7. Removal of DatasetCommit approval. The `auto_approve`, `is_approved` and diff --git a/solvebio/cli/data.py b/solvebio/cli/data.py index 2586e913..e31ae539 100644 --- a/solvebio/cli/data.py +++ b/solvebio/cli/data.py @@ -20,12 +20,37 @@ def create_dataset(args): """ Attempt to create a new dataset given the following params: - * dataset (full name) + * full_path * template_id * template_file * capacity + * create_vault """ + + # + # Deprecations + # + if args.vault: + raise Exception( + '[Deprecated] Pass vault path as part of "full_path"' + ) + + if args.path: + raise Exception( + '[Deprecated] Pass path as part of "full_path"' + ) + + if args.dataset_name: + raise Exception( + '[Deprecated] Pass dataset_name as part of "full_path"' + ) + + if args.genome_build: + raise Exception( + '[Deprecated] The genome_build parameter has been deprecated.' + ) + # Accept a template_id or a template_file if args.template_id: # Validate the template ID @@ -57,7 +82,7 @@ def create_dataset(args): print("A new dataset template was created with id: {0}".format(tpl.id)) else: print("Creating a new dataset {0} without a template." - .format(args.dataset_full_path)) + .format(args.full_path)) tpl = None fields = [] entity_type = None @@ -65,14 +90,14 @@ def create_dataset(args): if tpl: print("Creating new dataset {0} using the template '{1}'." - .format(args.dataset_full_path, tpl.name)) + .format(args.full_path, tpl.name)) fields = tpl.fields entity_type = tpl.entity_type # include template used to create description = 'Created with dataset template: {0}'.format(str(tpl.id)) return solvebio.Dataset.get_or_create_by_full_path( - args.dataset_full_path, + args.full_path, capacity=args.capacity, entity_type=entity_type, fields=fields, @@ -94,9 +119,13 @@ def upload(args): Given a folder or file, upload all the folders and files contained within it, skipping ones that already exist on the remote. """ + if args.path: + raise Exception( + '[Deprecated] Use --full-path instead' + ) base_local_paths = args.local_path - base_remote_path, path_dict = Object.validate_path(args.path) + base_remote_path, path_dict = Object.validate_path(args.full_path) vault_path = path_dict['domain'] + ':' + path_dict['vault'] # assert vault exists @@ -222,14 +251,31 @@ def import_file(args): * create_dataset * template_id - * vault_name - * path - * follow (default: False) - * dataset + * full_path + * commit_mode * capacity * file (list) + * follow (default: False) """ + # + # Deprecations + # + if args.vault: + raise Exception( + '[Deprecated] Pass vault path as part of "full_path"' + ) + + if args.path: + raise Exception( + '[Deprecated] Pass path as part of "full_path"' + ) + + if args.dataset_name: + raise Exception( + '[Deprecated] Pass dataset_name as part of "full_path"' + ) + if not solvebio.api_key: solvebio.login() @@ -238,12 +284,12 @@ def import_file(args): dataset = create_dataset(args) else: try: - dataset = solvebio.Dataset.get_by_full_path(args.dataset_full_path) + dataset = solvebio.Dataset.get_by_full_path(args.full_path) except solvebio.SolveError as e: if e.status_code != 404: raise e - print("Dataset not found: {0}".format(args.dataset_full_path)) + print("Dataset not found: {0}".format(args.full_path)) print("Tip: use the --create-dataset flag " "to create one from a template") sys.exit(1) diff --git a/solvebio/cli/main.py b/solvebio/cli/main.py index 348e5a87..14a663bc 100644 --- a/solvebio/cli/main.py +++ b/solvebio/cli/main.py @@ -90,13 +90,25 @@ class SolveArgumentParser(argparse.ArgumentParser): 'Options are "append" (default) or "overwrite".' }, { - 'name': 'dataset_full_path', + 'name': 'full_path', 'help': 'The full path to the dataset in the following format: "domain:vault:/path/variants"' # noqa }, { 'name': 'file', 'help': 'One or more local files to import', 'nargs': '+' + }, + { + 'flags': '--vault', + 'help': '[Deprecated] Pass vault path as part of "full_path"' # noqa + }, + { + 'flags': '--path', + 'help': '[Deprecated] Pass path as part of "full_path"' # noqa + }, + { + 'name': 'dataset_name', + 'help': '[Deprecated] Pass dataset_name as part of "full_path"' # noqa } ] }, @@ -127,9 +139,25 @@ class SolveArgumentParser(argparse.ArgumentParser): 'medium (<500M), large (>=500M)' }, { - 'name': 'dataset_full_path', + 'name': 'full_path', 'help': 'The full path to the dataset in the following format: ::' # noqa - } + }, + { + 'flags': '--vault', + 'help': '[Deprecated] Pass vault path as part of "full_path"' # noqa + }, + { + 'flags': '--path', + 'help': '[Deprecated] Pass path as part of "full_path"' # noqa + }, + { + 'name': 'dataset_name', + 'help': '[Deprecated] Pass dataset_name as part of "full_path"' # noqa + }, + { + 'flags': '--genome-build', + 'help': '[Deprecated] This parameter is no longer in use."' # noqa + }, ] }, 'upload': { @@ -137,7 +165,7 @@ class SolveArgumentParser(argparse.ArgumentParser): 'help': 'Upload a file or directory to a SolveBio Vault', 'arguments': [ { - 'flags': '--path', + 'flags': '--full-path', 'default': '/', 'help': 'The full path where the files and folders should ' 'be created, defaults to ":/"', @@ -147,6 +175,10 @@ class SolveArgumentParser(argparse.ArgumentParser): 'help': 'The path to the local file or directory ' 'to upload', 'nargs': '+' + }, + { + 'flags': '--path', + 'help': '[Deprecated] Use --full-path' } ] }, From 37b2134aefda8f291ca71c53534567c6da224bfb Mon Sep 17 00:00:00 2001 From: Jeff Hull Date: Thu, 28 Dec 2017 16:13:09 -0500 Subject: [PATCH 11/20] update messages --- solvebio/cli/data.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/solvebio/cli/data.py b/solvebio/cli/data.py index e31ae539..5c670bb4 100644 --- a/solvebio/cli/data.py +++ b/solvebio/cli/data.py @@ -33,22 +33,22 @@ def create_dataset(args): # if args.vault: raise Exception( - '[Deprecated] Pass vault path as part of "full_path"' + '[Deprecated] --vault has been deprecated. Pass vault path as part of "full_path"' # noqa ) if args.path: raise Exception( - '[Deprecated] Pass path as part of "full_path"' + '[Deprecated] --path has been deprecated. Pass path as part of "full_path"' # noqa ) if args.dataset_name: raise Exception( - '[Deprecated] Pass dataset_name as part of "full_path"' + '[Deprecated] "dataset_name" has been deprecated. Pass as part of "full_path"' # noqa ) if args.genome_build: raise Exception( - '[Deprecated] The genome_build parameter has been deprecated.' + '[Deprecated] The --genome_build parameter has been deprecated.' ) # Accept a template_id or a template_file @@ -121,7 +121,7 @@ def upload(args): """ if args.path: raise Exception( - '[Deprecated] Use --full-path instead' + '[Deprecated] Flag --path has been deprecated. Use --full-path instead' # noqa ) base_local_paths = args.local_path @@ -263,17 +263,17 @@ def import_file(args): # if args.vault: raise Exception( - '[Deprecated] Pass vault path as part of "full_path"' + '[Deprecated] --vault has been deprecated. Pass vault path as part of "full_path"' # noqa ) if args.path: raise Exception( - '[Deprecated] Pass path as part of "full_path"' + '[Deprecated] --path has been deprecated. Pass path as part of "full_path"' # noqa ) if args.dataset_name: raise Exception( - '[Deprecated] Pass dataset_name as part of "full_path"' + '[Deprecated] "dataset_name" has been deprecated. Pass as part of "full_path"' # noqa ) if not solvebio.api_key: From 2151143e492719a633fc971824fa7a54401138e0 Mon Sep 17 00:00:00 2001 From: Jeff Hull Date: Thu, 28 Dec 2017 16:50:54 -0500 Subject: [PATCH 12/20] update cases --- solvebio/resource/object.py | 27 ++++++++++++++++++++------- solvebio/test/test_object.py | 10 ++++------ 2 files changed, 24 insertions(+), 13 deletions(-) diff --git a/solvebio/resource/object.py b/solvebio/resource/object.py index b333ace0..84e876bc 100644 --- a/solvebio/resource/object.py +++ b/solvebio/resource/object.py @@ -50,6 +50,8 @@ def validate_path(cls, path, **kwargs): If no vault, uses personal vault. If no path, uses / """ + from solvebio.resource.vault import Vault + _client = kwargs.pop('client', None) or cls._client or client # Remove double slashes and leading ':' @@ -69,15 +71,26 @@ def validate_path(cls, path, **kwargs): account_domain = \ _client.get('/v1/user', {})['account']['domain'] vault_name, object_path = parts - # else assumes missing ":" between vault and path else: - account_domain = parts[0] - vault_name, object_path = parts[1].split('/', 1) + raise Exception('Full path must be of the format: ' + '"vault_name:object_path" or ' + '"account_domain:vault_name:object_path"') + # TODO maybe no good + # assumes missing ":" between vault and path + # mydomain:myvault/here/is/path + # myvault:here/is/path + # account_domain = parts[0] + # vault_name, object_path = parts[1].split('/', 1) else: - user = _client.get('/v1/user', {}) - account_domain = user['account']['domain'] - vault_name = 'user-{0}'.format(user['id']) - object_path = path or '/' + # if slash assume user means private vault + if '/' in parts[0]: + vault = Vault.get_personal_vault(client=_client) + account_domain, vault_name = vault.full_path.split(':') + object_path = parts[0] + else: + raise Exception('Full path must be of the format: ' + '"vault_name:object_path" or ' + '"account_domain:vault_name:object_path"') if object_path[0] != '/': object_path = '/' + object_path diff --git a/solvebio/test/test_object.py b/solvebio/test/test_object.py index 4bd1c8b3..2595df87 100644 --- a/solvebio/test/test_object.py +++ b/solvebio/test/test_object.py @@ -21,14 +21,12 @@ def test_object_path_cases(self): test_cases = [ ['{0}:myVault'.format(domain), '{0}:myVault:/'.format(domain)], ['acme:myVault', 'acme:myVault:/'], - ['myVault', '{0}:/myVault'.format(user_vault)], ['acme:myVault:/uploads_folder', 'acme:myVault:/uploads_folder'], - ['acme:myVault:/uploads_folder', 'acme:myVault:/uploads_folder'], - ['acme:myVault/uploads_folder', 'acme:myVault:/uploads_folder'], ['myVault:/uploads_folder', '{0}:myVault:/uploads_folder'.format(domain)], # noqa ['/uploads_folder', '{0}:/uploads_folder'.format(user_vault)], [':/uploads_folder', '{0}:/uploads_folder'.format(user_vault)], ['myVault/uploads_folder', '{0}:/myVault/uploads_folder'.format(user_vault)], # noqa + ['oops:myDomain:myVault', 'oops:myDomain:/myVault'], ] for case, expected in test_cases: p, _ = self.client.Object.validate_path(case) @@ -36,9 +34,9 @@ def test_object_path_cases(self): error_test_cases = [ '', - 'myDomain:myVault:/the/heack', - 'oops:myDomain:myVault', + 'myVault', + 'acme:myVault/uploads_folder' ] for case in error_test_cases: with self.assertRaises(Exception): - v, v_paths = self.client.Vault.validate_path(case) + v, v_paths = self.client.Object.validate_path(case) From 549b1aa7d3ddeae0c7aef0ba018631fc5c506223 Mon Sep 17 00:00:00 2001 From: Jeff Hull Date: Fri, 29 Dec 2017 10:13:47 -0500 Subject: [PATCH 13/20] remove deprecation --- solvebio/cli/data.py | 5 ----- solvebio/cli/main.py | 4 ---- 2 files changed, 9 deletions(-) diff --git a/solvebio/cli/data.py b/solvebio/cli/data.py index 5c670bb4..11301868 100644 --- a/solvebio/cli/data.py +++ b/solvebio/cli/data.py @@ -41,11 +41,6 @@ def create_dataset(args): '[Deprecated] --path has been deprecated. Pass path as part of "full_path"' # noqa ) - if args.dataset_name: - raise Exception( - '[Deprecated] "dataset_name" has been deprecated. Pass as part of "full_path"' # noqa - ) - if args.genome_build: raise Exception( '[Deprecated] The --genome_build parameter has been deprecated.' diff --git a/solvebio/cli/main.py b/solvebio/cli/main.py index 14a663bc..f5846d3a 100644 --- a/solvebio/cli/main.py +++ b/solvebio/cli/main.py @@ -150,10 +150,6 @@ class SolveArgumentParser(argparse.ArgumentParser): 'flags': '--path', 'help': '[Deprecated] Pass path as part of "full_path"' # noqa }, - { - 'name': 'dataset_name', - 'help': '[Deprecated] Pass dataset_name as part of "full_path"' # noqa - }, { 'flags': '--genome-build', 'help': '[Deprecated] This parameter is no longer in use."' # noqa From 5fb70d5c6f5b95d4a62e47aae7591f5c7c112e64 Mon Sep 17 00:00:00 2001 From: Jeff Hull Date: Fri, 29 Dec 2017 12:11:04 -0500 Subject: [PATCH 14/20] remove except statement that masked already handled error --- solvebio/resource/apiresource.py | 1 - solvebio/resource/vault.py | 18 +++++++----------- 2 files changed, 7 insertions(+), 12 deletions(-) diff --git a/solvebio/resource/apiresource.py b/solvebio/resource/apiresource.py index c6496544..a1a66b65 100644 --- a/solvebio/resource/apiresource.py +++ b/solvebio/resource/apiresource.py @@ -245,7 +245,6 @@ def _retrieve_helper(cls, model_name, field_name, error_value, **params): results = convert_to_solve_object(response, client=_client) objects = results.data allow_multiple = params.pop('allow_multiple', None) - if len(objects) > 1: if allow_multiple: return objects diff --git a/solvebio/resource/vault.py b/solvebio/resource/vault.py index a4b016a5..3de835d8 100644 --- a/solvebio/resource/vault.py +++ b/solvebio/resource/vault.py @@ -129,17 +129,13 @@ def search(self, query, **params): @classmethod def get_by_full_path(cls, full_path, **kwargs): _client = kwargs.pop('client', None) or cls._client or client - try: - full_path, parts = cls.validate_path(full_path) - return Vault._retrieve_helper( - 'vault', 'name', parts['vault'], - account_domain=parts['domain'], - name=parts['vault'], - client=_client - ) - except: - raise Exception('Full path must be of the form "vault_name" or ' - '"account_domain:vault_name"') + full_path, parts = cls.validate_path(full_path) + return Vault._retrieve_helper( + 'vault', 'name', full_path, + account_domain=parts['domain'], + name=parts['vault'], + client=_client + ) @classmethod def get_or_create_by_full_path(cls, full_path, **kwargs): From 9d43da2cd706cc4df164831fce7de74f656ee0a6 Mon Sep 17 00:00:00 2001 From: Jeff Hull Date: Fri, 29 Dec 2017 12:20:53 -0500 Subject: [PATCH 15/20] update path handling for creating datasets and errors --- solvebio/resource/dataset.py | 23 +++++++++++------------ solvebio/resource/object.py | 8 ++++---- 2 files changed, 15 insertions(+), 16 deletions(-) diff --git a/solvebio/resource/dataset.py b/solvebio/resource/dataset.py index 390afdbb..2720223f 100644 --- a/solvebio/resource/dataset.py +++ b/solvebio/resource/dataset.py @@ -75,34 +75,33 @@ def get_or_create_by_full_path(cls, full_path, **kwargs): create_vault = kwargs.pop('create_vault', False) create_folders = kwargs.pop('create_folders', True) + # Validate path + try: return Dataset.get_by_full_path(full_path, client=_client) except NotFoundError: pass # Dataset not found, create it step-by-step - parts = full_path.split(':', 2) - - if len(parts) == 3: - account_domain, vault_name, object_path = parts - elif len(parts) == 2: - vault_name, object_path = parts - user = _client.get('/v1/user', {}) - account_domain = user['account']['domain'] + full_path, parts = Object.validate_path(full_path, client=_client) if create_vault: vault = Vault.get_or_create_by_full_path( - '{0}:{1}'.format(account_domain, vault_name), + '{0}:{1}'.format(parts['domain'], parts['vault']), client=_client) else: - vaults = Vault.all(account_domain=account_domain, name=vault_name, + vaults = Vault.all(account_domain=parts['domain'], + name=parts['vault'], client=_client) if len(vaults.solve_objects()) == 0: - raise Exception('Vault does not exist with name {0}'.format( - vault_name)) + raise Exception( + 'Vault does not exist with name {0}:{1}'.format( + parts['domain'], parts['vault']) + ) vault = vaults.solve_objects()[0] # Create the folders to hold the dataset if they do not already exist. + object_path = parts['path'] curr_path = os.path.dirname(object_path) folders_to_create = [] new_folders = [] diff --git a/solvebio/resource/object.py b/solvebio/resource/object.py index 84e876bc..4f57463c 100644 --- a/solvebio/resource/object.py +++ b/solvebio/resource/object.py @@ -73,8 +73,8 @@ def validate_path(cls, path, **kwargs): vault_name, object_path = parts else: raise Exception('Full path must be of the format: ' - '"vault_name:object_path" or ' - '"account_domain:vault_name:object_path"') + '"vault_name:/object_path" or ' + '"account_domain:vault_name:/object_path"') # TODO maybe no good # assumes missing ":" between vault and path # mydomain:myvault/here/is/path @@ -89,8 +89,8 @@ def validate_path(cls, path, **kwargs): object_path = parts[0] else: raise Exception('Full path must be of the format: ' - '"vault_name:object_path" or ' - '"account_domain:vault_name:object_path"') + '"vault_name:/object_path" or ' + '"account_domain:vault_name:/object_path"') if object_path[0] != '/': object_path = '/' + object_path From 4e0b745e54f19725aa25c915ca7cba0941017fdd Mon Sep 17 00:00:00 2001 From: Jeff Hull Date: Fri, 29 Dec 2017 12:26:18 -0500 Subject: [PATCH 16/20] including these deprecations is very confusing --- solvebio/cli/data.py | 15 --------------- solvebio/cli/main.py | 8 -------- 2 files changed, 23 deletions(-) diff --git a/solvebio/cli/data.py b/solvebio/cli/data.py index 11301868..0f7f8dba 100644 --- a/solvebio/cli/data.py +++ b/solvebio/cli/data.py @@ -36,11 +36,6 @@ def create_dataset(args): '[Deprecated] --vault has been deprecated. Pass vault path as part of "full_path"' # noqa ) - if args.path: - raise Exception( - '[Deprecated] --path has been deprecated. Pass path as part of "full_path"' # noqa - ) - if args.genome_build: raise Exception( '[Deprecated] The --genome_build parameter has been deprecated.' @@ -261,16 +256,6 @@ def import_file(args): '[Deprecated] --vault has been deprecated. Pass vault path as part of "full_path"' # noqa ) - if args.path: - raise Exception( - '[Deprecated] --path has been deprecated. Pass path as part of "full_path"' # noqa - ) - - if args.dataset_name: - raise Exception( - '[Deprecated] "dataset_name" has been deprecated. Pass as part of "full_path"' # noqa - ) - if not solvebio.api_key: solvebio.login() diff --git a/solvebio/cli/main.py b/solvebio/cli/main.py index f5846d3a..2805c575 100644 --- a/solvebio/cli/main.py +++ b/solvebio/cli/main.py @@ -101,14 +101,6 @@ class SolveArgumentParser(argparse.ArgumentParser): { 'flags': '--vault', 'help': '[Deprecated] Pass vault path as part of "full_path"' # noqa - }, - { - 'flags': '--path', - 'help': '[Deprecated] Pass path as part of "full_path"' # noqa - }, - { - 'name': 'dataset_name', - 'help': '[Deprecated] Pass dataset_name as part of "full_path"' # noqa } ] }, From d8328d10c2db240b8fb0ed5e81525bc8582fff8e Mon Sep 17 00:00:00 2001 From: David Caplan Date: Wed, 3 Jan 2018 16:36:55 -0500 Subject: [PATCH 17/20] add back --vault and --path, but ensure --full-path overrides --- solvebio/cli/data.py | 239 ++++++++++++++++++++++--------------------- solvebio/cli/main.py | 61 +++++++---- 2 files changed, 161 insertions(+), 139 deletions(-) diff --git a/solvebio/cli/data.py b/solvebio/cli/data.py index 0f7f8dba..55ec363e 100644 --- a/solvebio/cli/data.py +++ b/solvebio/cli/data.py @@ -16,84 +16,25 @@ from solvebio.errors import ObjectTypeError, NotFoundError -def create_dataset(args): - """ - Attempt to create a new dataset given the following params: - - * full_path - * template_id - * template_file - * capacity - * create_vault - +def _full_path_from_args(args): """ + Handles the following args: - # - # Deprecations - # - if args.vault: - raise Exception( - '[Deprecated] --vault has been deprecated. Pass vault path as part of "full_path"' # noqa - ) - - if args.genome_build: - raise Exception( - '[Deprecated] The --genome_build parameter has been deprecated.' - ) - - # Accept a template_id or a template_file - if args.template_id: - # Validate the template ID - try: - tpl = solvebio.DatasetTemplate.retrieve(args.template_id) - except solvebio.SolveError as e: - if e.status_code != 404: - raise e - print("No template with ID {0} found!" - .format(args.template_id)) - sys.exit(1) - elif args.template_file: - mode = 'r' - fopen = open - if check_gzip_path(args.template_file): - mode = 'rb' - fopen = gzip.open + * full-path + * vault + * path - # Validate the template file - with fopen(args.template_file, mode) as fp: - try: - tpl_json = json.load(fp) - except: - print('Template file {0} could not be loaded. Please ' - 'pass valid JSON'.format(args.template_file)) - sys.exit(1) + Always uses "full_path" if provided (overrides vault and path). + Otherwise, attempts to use "vault" and "path". - tpl = solvebio.DatasetTemplate.create(**tpl_json) - print("A new dataset template was created with id: {0}".format(tpl.id)) - else: - print("Creating a new dataset {0} without a template." - .format(args.full_path)) - tpl = None - fields = [] - entity_type = None - description = None + If no paths are specified, defaults to personal-vault:/ + """ + if args.full_path: + return args.full_path - if tpl: - print("Creating new dataset {0} using the template '{1}'." - .format(args.full_path, tpl.name)) - fields = tpl.fields - entity_type = tpl.entity_type - # include template used to create - description = 'Created with dataset template: {0}'.format(str(tpl.id)) - - return solvebio.Dataset.get_or_create_by_full_path( - args.full_path, - capacity=args.capacity, - entity_type=entity_type, - fields=fields, - description=description, - create_vault=args.create_vault, - ) + return '{0}:{1}'.format( + args.vault or Vault.get_personal_vault().name, + args.path or '/') def _assert_object_type(obj, object_type): @@ -104,41 +45,6 @@ def _assert_object_type(obj, object_type): )) -def upload(args): - """ - Given a folder or file, upload all the folders and files contained - within it, skipping ones that already exist on the remote. - """ - if args.path: - raise Exception( - '[Deprecated] Flag --path has been deprecated. Use --full-path instead' # noqa - ) - - base_local_paths = args.local_path - base_remote_path, path_dict = Object.validate_path(args.full_path) - vault_path = path_dict['domain'] + ':' + path_dict['vault'] - - # assert vault exists - vault = Vault.get_by_full_path(vault_path) - - # If not the vault root, validate remote path exists and is a folder - if path_dict['path'] != '/': - _assert_object_type(Object.get_by_full_path( - base_remote_path), 'folder') - - for local_path in base_local_paths: - - local_path = local_path.rstrip('/') - local_start = os.path.basename(local_path) - - if os.path.isdir(local_path): - _upload_folder(path_dict['domain'], vault, - base_remote_path, local_path, local_start) - else: - Object.upload_file( - local_path, path_dict['path'], path_dict['vault']) - - def _upload_folder(domain, vault, base_remote_path, base_local_path, local_start): @@ -233,6 +139,106 @@ def _upload_folder(domain, vault, base_remote_path, vault.name) +def create_dataset(args): + """ + Attempt to create a new dataset given the following params: + + * full_path (or vault & path) + * template_id + * template_file + * capacity + * create_vault + + NOTE: genome_build has been deprecated and is no longer used. + + """ + full_path = _full_path_from_args(args) + + # Accept a template_id or a template_file + if args.template_id: + # Validate the template ID + try: + tpl = solvebio.DatasetTemplate.retrieve(args.template_id) + except solvebio.SolveError as e: + if e.status_code != 404: + raise e + print("No template with ID {0} found!" + .format(args.template_id)) + sys.exit(1) + elif args.template_file: + mode = 'r' + fopen = open + if check_gzip_path(args.template_file): + mode = 'rb' + fopen = gzip.open + + # Validate the template file + with fopen(args.template_file, mode) as fp: + try: + tpl_json = json.load(fp) + except: + print('Template file {0} could not be loaded. Please ' + 'pass valid JSON'.format(args.template_file)) + sys.exit(1) + + tpl = solvebio.DatasetTemplate.create(**tpl_json) + print("A new dataset template was created with id: {0}".format(tpl.id)) + else: + print("Creating a new dataset {0} without a template." + .format(full_path)) + tpl = None + fields = [] + entity_type = None + description = None + + if tpl: + print("Creating new dataset {0} using the template '{1}'." + .format(full_path, tpl.name)) + fields = tpl.fields + entity_type = tpl.entity_type + # include template used to create + description = 'Created with dataset template: {0}'.format(str(tpl.id)) + + return solvebio.Dataset.get_or_create_by_full_path( + full_path, + capacity=args.capacity, + entity_type=entity_type, + fields=fields, + description=description, + create_vault=args.create_vault, + ) + + +def upload(args): + """ + Given a folder or file, upload all the folders and files contained + within it, skipping ones that already exist on the remote. + """ + + full_path = _full_path_from_args(args) + base_remote_path, path_dict = Object.validate_path(full_path) + vault_path = path_dict['domain'] + ':' + path_dict['vault'] + + # Assert the vault exists and is accessible + vault = Vault.get_by_full_path(vault_path) + + # If not the vault root, validate remote path exists and is a folder + if path_dict['path'] != '/': + _assert_object_type(Object.get_by_full_path( + base_remote_path), 'folder') + + for local_path in args.local_path: + local_path = local_path.rstrip('/') + local_start = os.path.basename(local_path) + + if os.path.isdir(local_path): + _upload_folder(path_dict['domain'], vault, + base_remote_path, local_path, local_start) + else: + Object.upload_file( + local_path, path_dict['path'], path_dict['vault']) + + def import_file(args): """ Given a dataset and a local path, upload and import the file(s). @@ -241,35 +247,30 @@ def import_file(args): * create_dataset * template_id - * full_path + * full_path (or vault & path) * commit_mode * capacity * file (list) * follow (default: False) """ - # - # Deprecations - # - if args.vault: - raise Exception( - '[Deprecated] --vault has been deprecated. Pass vault path as part of "full_path"' # noqa - ) - + # FIXME: Does this need to be here? What about other commands? if not solvebio.api_key: solvebio.login() + full_path = _full_path_from_args(args) + # Ensure the dataset exists. Create if necessary. if args.create_dataset: dataset = create_dataset(args) else: try: - dataset = solvebio.Dataset.get_by_full_path(args.full_path) + dataset = solvebio.Dataset.get_by_full_path(full_path) except solvebio.SolveError as e: if e.status_code != 404: raise e - print("Dataset not found: {0}".format(args.full_path)) + print("Dataset not found: {0}".format(full_path)) print("Tip: use the --create-dataset flag " "to create one from a template") sys.exit(1) diff --git a/solvebio/cli/main.py b/solvebio/cli/main.py index 2805c575..2111c42b 100644 --- a/solvebio/cli/main.py +++ b/solvebio/cli/main.py @@ -90,18 +90,28 @@ class SolveArgumentParser(argparse.ArgumentParser): 'Options are "append" (default) or "overwrite".' }, { - 'name': 'full_path', - 'help': 'The full path to the dataset in the following format: "domain:vault:/path/variants"' # noqa + 'flags': '--full-path', + 'help': 'The full path to the dataset in the format: ' + '"domain:vault:/path/dataset". ' + 'Overrides --vault and --path' + }, + { + 'flags': '--vault', + 'help': + 'The vault containing the dataset (use with --path). ' + 'Defaults to your personal vault. ' + 'Overridden by --full-path' + }, + { + 'flags': '--path', + 'help': 'The path to the dataset (relative to the vault). ' + 'Used with --vault, overridden by --full-path' }, { 'name': 'file', 'help': 'One or more local files to import', 'nargs': '+' }, - { - 'flags': '--vault', - 'help': '[Deprecated] Pass vault path as part of "full_path"' # noqa - } ] }, 'create-dataset': { @@ -131,20 +141,22 @@ class SolveArgumentParser(argparse.ArgumentParser): 'medium (<500M), large (>=500M)' }, { - 'name': 'full_path', - 'help': 'The full path to the dataset in the following format: ::' # noqa + 'flags': '--full-path', + 'help': 'The full path to the dataset in the format: ' + '"domain:vault:/path/dataset". ' + 'Overrides --vault and --path' }, { 'flags': '--vault', - 'help': '[Deprecated] Pass vault path as part of "full_path"' # noqa + 'help': + 'The vault containing the dataset (use with --path). ' + 'Defaults to your personal vault. ' + 'Overridden by --full-path' }, { 'flags': '--path', - 'help': '[Deprecated] Pass path as part of "full_path"' # noqa - }, - { - 'flags': '--genome-build', - 'help': '[Deprecated] This parameter is no longer in use."' # noqa + 'help': 'The path to the dataset (relative to the vault). ' + 'Used with --vault, overridden by --full-path' }, ] }, @@ -154,19 +166,28 @@ class SolveArgumentParser(argparse.ArgumentParser): 'arguments': [ { 'flags': '--full-path', - 'default': '/', 'help': 'The full path where the files and folders should ' - 'be created, defaults to ":/"', + 'be created, defaults to the root of your personal vault. ' + 'Overrides --vault and --path' + }, + { + 'flags': '--vault', + 'help': 'The vault where the files will be uploaded. ' + 'Defaults to your personal vault. ' + 'Overridden by --full-path' + }, + { + 'flags': '--path', + 'help': 'The path (relative to a vault) ' + 'where the files will be uploaded. ' + 'Defaults to the root directory (/). ' + 'Overridden by --full-path' }, { 'name': 'local_path', 'help': 'The path to the local file or directory ' 'to upload', 'nargs': '+' - }, - { - 'flags': '--path', - 'help': '[Deprecated] Use --full-path' } ] }, From c3c4383d53fefc642203e2de0db099e89351a38a Mon Sep 17 00:00:00 2001 From: David Caplan Date: Thu, 11 Jan 2018 09:54:35 -0500 Subject: [PATCH 18/20] try refactoring path validators with regex (#218) * refactor full path validators with regex * fix issues with regexes and overrides; use explicit function names * switch to tilde for personal vault --- solvebio/cli/data.py | 49 ++++-------- solvebio/cli/main.py | 39 +++++---- solvebio/resource/dataset.py | 4 +- solvebio/resource/object.py | 136 ++++++++++++++++++++------------ solvebio/resource/vault.py | 83 ++++++++++++++----- solvebio/test/test_object.py | 69 +++++++++++++--- solvebio/test/test_shortcuts.py | 2 +- solvebio/test/test_vault.py | 30 +++++-- 8 files changed, 269 insertions(+), 143 deletions(-) diff --git a/solvebio/cli/data.py b/solvebio/cli/data.py index 55ec363e..3d897a8e 100644 --- a/solvebio/cli/data.py +++ b/solvebio/cli/data.py @@ -16,32 +16,12 @@ from solvebio.errors import ObjectTypeError, NotFoundError -def _full_path_from_args(args): - """ - Handles the following args: - - * full-path - * vault - * path - - Always uses "full_path" if provided (overrides vault and path). - Otherwise, attempts to use "vault" and "path". - - If no paths are specified, defaults to personal-vault:/ - """ - if args.full_path: - return args.full_path - - return '{0}:{1}'.format( - args.vault or Vault.get_personal_vault().name, - args.path or '/') - - def _assert_object_type(obj, object_type): if obj.object_type != object_type: - raise ObjectTypeError('{0} is a {1} but must be a folder'.format( + raise ObjectTypeError('{0} is a {1} but must be a {2}'.format( obj.path, obj.object_type, + object_type )) @@ -50,14 +30,14 @@ def _upload_folder(domain, vault, base_remote_path, # Create the upload root folder if it does not exist on the remote try: - upload_root_path, _ = Object.validate_path( + upload_root_path, _ = Object.validate_full_path( os.path.join(base_remote_path, local_start) ) obj = Object.get_by_full_path(upload_root_path) _assert_object_type(obj, 'folder') except NotFoundError: base_remote_path, path_dict = \ - Object.validate_path(base_remote_path) + Object.validate_full_path(base_remote_path) if path_dict['path'] == '/': parent_object_id = None @@ -143,16 +123,18 @@ def create_dataset(args): """ Attempt to create a new dataset given the following params: - * full_path (or vault & path) * template_id * template_file * capacity * create_vault + * [argument] dataset name or full path NOTE: genome_build has been deprecated and is no longer used. """ - full_path = _full_path_from_args(args) + # TODO: Support for a parent object path argument? + full_path, path_dict = Object.validate_full_path( + args.full_path, vault=args.vault, path=args.path) # Accept a template_id or a template_file if args.template_id: @@ -214,13 +196,11 @@ def upload(args): Given a folder or file, upload all the folders and files contained within it, skipping ones that already exist on the remote. """ - - full_path = _full_path_from_args(args) - base_remote_path, path_dict = Object.validate_path(full_path) - vault_path = path_dict['domain'] + ':' + path_dict['vault'] + base_remote_path, path_dict = Object.validate_full_path( + args.full_path, vault=args.vault, path=args.path) # Assert the vault exists and is accessible - vault = Vault.get_by_full_path(vault_path) + vault = Vault.get_by_full_path(path_dict['vault_full_path']) # If not the vault root, validate remote path exists and is a folder if path_dict['path'] != '/': @@ -247,7 +227,9 @@ def import_file(args): * create_dataset * template_id - * full_path (or vault & path) + * full_path + * vault (optional, overrides the vault in full_path) + * path (optional, overrides the path in full_path) * commit_mode * capacity * file (list) @@ -258,7 +240,8 @@ def import_file(args): if not solvebio.api_key: solvebio.login() - full_path = _full_path_from_args(args) + full_path, path_dict = Object.validate_full_path( + args.full_path, vault=args.vault, path=args.path) # Ensure the dataset exists. Create if necessary. if args.create_dataset: diff --git a/solvebio/cli/main.py b/solvebio/cli/main.py index 2111c42b..737cb308 100644 --- a/solvebio/cli/main.py +++ b/solvebio/cli/main.py @@ -97,15 +97,14 @@ class SolveArgumentParser(argparse.ArgumentParser): }, { 'flags': '--vault', - 'help': - 'The vault containing the dataset (use with --path). ' + 'help': 'The vault containing the dataset. ' 'Defaults to your personal vault. ' - 'Overridden by --full-path' + 'Overrides the vault component of --full-path' }, { 'flags': '--path', - 'help': 'The path to the dataset (relative to the vault). ' - 'Used with --vault, overridden by --full-path' + 'help': 'The path to the dataset (relative to a vault). ' + 'Overrides the path component of --full-path' }, { 'name': 'file', @@ -140,23 +139,25 @@ class SolveArgumentParser(argparse.ArgumentParser): 'small (default, <100M records), ' 'medium (<500M), large (>=500M)' }, - { - 'flags': '--full-path', - 'help': 'The full path to the dataset in the format: ' - '"domain:vault:/path/dataset". ' - 'Overrides --vault and --path' - }, { 'flags': '--vault', 'help': - 'The vault containing the dataset (use with --path). ' - 'Defaults to your personal vault. ' - 'Overridden by --full-path' + 'The vault containing the dataset. ' + 'Overrides the vault component of the full path argument' }, { 'flags': '--path', 'help': 'The path to the dataset (relative to the vault). ' - 'Used with --vault, overridden by --full-path' + 'Overrides the path component of the full path argument' + }, + { + 'name': 'full_path', + 'help': 'The full path to the dataset in the format: ' + '"domain:vault:/path/dataset". ' + 'Defaults to your personal vault if no vault is provided. ' + 'Defaults to the vault root if no path is provided. ' + 'Override the vault with --vault ' + 'and/or the path with --path' }, ] }, @@ -167,21 +168,19 @@ class SolveArgumentParser(argparse.ArgumentParser): { 'flags': '--full-path', 'help': 'The full path where the files and folders should ' - 'be created, defaults to the root of your personal vault. ' - 'Overrides --vault and --path' + 'be created, defaults to the root of your personal vault' }, { 'flags': '--vault', 'help': 'The vault where the files will be uploaded. ' 'Defaults to your personal vault. ' - 'Overridden by --full-path' + 'Overrides the vault component of --full-path' }, { 'flags': '--path', 'help': 'The path (relative to a vault) ' 'where the files will be uploaded. ' - 'Defaults to the root directory (/). ' - 'Overridden by --full-path' + 'Overrides the path component of --full-path' }, { 'name': 'local_path', diff --git a/solvebio/resource/dataset.py b/solvebio/resource/dataset.py index 2720223f..c95684f3 100644 --- a/solvebio/resource/dataset.py +++ b/solvebio/resource/dataset.py @@ -61,7 +61,7 @@ def make_full_path(cls, vault_name, path, name, **kwargs): def get_by_full_path(cls, full_path, **kwargs): from solvebio import Object _client = kwargs.pop('client', None) or cls._client or client - test_path, _ = Object.validate_path(full_path, client=_client) + test_path, _ = Object.validate_full_path(full_path, client=_client) obj = Object.get_by_full_path(test_path, client=_client) dataset = Dataset.retrieve(obj['dataset_id'], client=_client, **kwargs) return dataset @@ -83,7 +83,7 @@ def get_or_create_by_full_path(cls, full_path, **kwargs): pass # Dataset not found, create it step-by-step - full_path, parts = Object.validate_path(full_path, client=_client) + full_path, parts = Object.validate_full_path(full_path, client=_client) if create_vault: vault = Vault.get_or_create_by_full_path( diff --git a/solvebio/resource/object.py b/solvebio/resource/object.py index 4f57463c..f1cea87f 100644 --- a/solvebio/resource/object.py +++ b/solvebio/resource/object.py @@ -42,67 +42,103 @@ class Object(CreateableAPIResource, ('description', 'Description'), ) + # Regex describing an object path. + PATH_RE = re.compile(r'^[^\/]*(?P(\/[^\/]*)+)$') + @classmethod - def validate_path(cls, path, **kwargs): - """ Helper method to return a full path + def validate_full_path(cls, full_path, **kwargs): + """Helper method to parse a full or partial path and + return a full path as well as a dict containing path parts. + + Uses the following rules when processing the path: + + * If no domain, uses the current user's account domain + * If no vault, uses the current user's personal vault. + * If no path, uses '/' (vault root) + + Returns a tuple containing: + + * The validated full_path + * A dictionary with the components: + * domain: the domain of the vault + * vault: the name of the vault, without domain + * vault_full_path: domain:vault + * path: the object path within the vault + * parent_path: the parent path to the object + * filename: the object's filename (if any) + * full_path: the validated full path + + The following components may be overridden using kwargs: + + * vault + * path + + Object paths (also known as "paths") must begin with a forward slash. + + The following path formats are supported: + + domain:vault:/path -> object "path" in the root of "domain:vault" + domain:vault/path -> object "path" in the root of "domain:vault" + vault:/path -> object "path" in the root of "vault" + vault/path -> object "path" in the root of "vault" + ~/path -> object "path" in the root of personal vault + vault/ -> root of "vault" + ~/ -> root of your personal vault + + The following two formats are not supported: + + path -> invalid/ambiguous path (exception) + vault:path -> invalid/ambiguous path (exception) + vault:path/path -> unsupported, interpreted as domain:vault/path - If no account_domain, assumes user's account domain - If no vault, uses personal vault. - If no path, uses / """ from solvebio.resource.vault import Vault _client = kwargs.pop('client', None) or cls._client or client - # Remove double slashes and leading ':' - path = re.sub('//+', '/', path.lstrip(':')) - - parts = path.split(':', 2) - if len(parts) == 3: - account_domain, vault_name, object_path = parts - elif len(parts) == 2: - # if no slash assume user means root - if '/' not in parts[1]: - account_domain, vault_name = parts - object_path = '/' - else: - # if second part begins with slash, assume missing domain - if parts[1][0] == '/': - account_domain = \ - _client.get('/v1/user', {})['account']['domain'] - vault_name, object_path = parts - else: - raise Exception('Full path must be of the format: ' - '"vault_name:/object_path" or ' - '"account_domain:vault_name:/object_path"') - # TODO maybe no good - # assumes missing ":" between vault and path - # mydomain:myvault/here/is/path - # myvault:here/is/path - # account_domain = parts[0] - # vault_name, object_path = parts[1].split('/', 1) + if not full_path: + raise Exception( + 'Invalid path: ', + 'Full path must be in one of the following formats: ' + '"vault:/path", "domain:vault:/path", or "~/path"') + + # Parse the vault's full_path, using overrides if any + input_vault = kwargs.get('vault') or full_path + try: + vault_full_path, path_dict = \ + Vault.validate_full_path(input_vault, client=_client) + except: + raise Exception( + 'Could not determine vault from "{0}". ' + 'Full path must be in one of the following formats: ' + '"vault:/path", "domain:vault:/path", or "~/path"' + .format(input_vault)) + + if kwargs.get('path'): + # Allow override of the object_path. + full_path = '{0}:/{1}'.format(vault_full_path, kwargs['path']) + + match = cls.PATH_RE.match(full_path) + if match: + object_path = match.groupdict()['path'] else: - # if slash assume user means private vault - if '/' in parts[0]: - vault = Vault.get_personal_vault(client=_client) - account_domain, vault_name = vault.full_path.split(':') - object_path = parts[0] - else: - raise Exception('Full path must be of the format: ' - '"vault_name:/object_path" or ' - '"account_domain:vault_name:/object_path"') - - if object_path[0] != '/': - object_path = '/' + object_path - - # Strip trailing slash + raise Exception( + 'Cannot find a valid object path in "{0}". ' + 'Full path must be in one of the following formats: ' + '"vault:/path", "domain:vault:/path", or "~/path"' + .format(full_path)) + + # Remove double slashes + object_path = re.sub('//+', '/', object_path) if object_path != '/': + # Remove trailing slash object_path = object_path.rstrip('/') - full_path = ':'.join([account_domain, vault_name, object_path]) - return full_path, dict(domain=account_domain, - vault=vault_name, - path=object_path) + path_dict['path'] = object_path + # TODO: parent_path and filename + full_path = '{domain}:{vault}:{path}'.format(**path_dict) + path_dict['full_path'] = full_path + return full_path, path_dict @classmethod def get_by_full_path(cls, full_path, **params): diff --git a/solvebio/resource/vault.py b/solvebio/resource/vault.py index 3de835d8..fb64e95f 100644 --- a/solvebio/resource/vault.py +++ b/solvebio/resource/vault.py @@ -8,6 +8,8 @@ from .apiresource import UpdateableAPIResource from .apiresource import DeletableAPIResource +import re + class Vault(CreateableAPIResource, ListableAPIResource, @@ -31,6 +33,14 @@ class Vault(CreateableAPIResource, ('description', 'Description'), ) + # Regex describing Vault full path. + # NOTE: Not valid for object full paths. + VAULT_PATH_RE = re.compile( + # Non-greedy wildcard match for domain + r'^(?:(?P[a-zA-Z0-9\-]+)\:)??' + # Match vault or vault:/ or vault/ + r'(?P[^\/:]+)(?:\:?\/.*)?$') + def __init__(self, vault_id, **kwargs): super(Vault, self).__init__(vault_id, **kwargs) @@ -45,34 +55,65 @@ def _object_list_helper(self, **params): return items @classmethod - def validate_path(cls, path, **kwargs): - """ Helper method to return a full path + def validate_full_path(cls, full_path, **kwargs): + """Helper method to return a full path from a full or partial path. + + If no domain, assumes user's account domain + If the vault is "~", assumes personal vault. + + Valid vault paths include: + + domain:vault + domain:vault:/path + domain:vault/path + vault:/path + vault + ~/ + + Invalid vault paths include: + + /vault/ + /path + / + :/ - If no account_domain, assumes user's account domain - If no vault, uses personal vault. + Does not allow overrides for any vault path components. """ _client = kwargs.pop('client', None) or cls._client or client - parts = path.split(':') - if not path or len(parts) > 2: + full_path = full_path.strip() + if not full_path: raise Exception( - 'Vault path "{}" invalid. Must be of the format: ' - '"account_domain:vault_name".'.format(path) + 'Vault path "{0}" is invalid. Path must be in the format: ' + '"domain:vault:/path" or "vault:/path".'.format(full_path) ) - elif len(parts) == 2: - account_domain, vault_name = parts - else: - user = _client.get('/v1/user', {}) - account_domain = user['account']['domain'] - vault_name = path - # Strip any paths from the vault_name - if '/' in vault_name: - vault_name = vault_name.split('/', 1)[0] + match = cls.VAULT_PATH_RE.match(full_path) + if not match: + raise Exception( + 'Vault path "{0}" is invalid. Path must be in the format: ' + '"domain:vault:/path" or "vault:/path".'.format(full_path) + ) + path_parts = match.groupdict() + + # Handle the special case where "~" means personal vault + if path_parts.get('vault') == '~': + path_parts = dict(domain=None, vault=None) - full_path = ':'.join([account_domain, vault_name]) - return full_path, dict(domain=account_domain, - vault=vault_name) + # If any values are None, set defaults from the user. + if None in path_parts.values(): + user = _client.get('/v1/user', {}) + defaults = { + 'domain': user['account']['domain'], + 'vault': 'user-{0}'.format(user['id']) + } + path_parts = dict((k, v or defaults.get(k)) + for k, v in path_parts.items()) + + # Rebuild the full path + full_path = '{domain}:{vault}'.format(**path_parts) + path_parts['vault_full_path'] = full_path + return full_path, path_parts def files(self, **params): return self._object_list_helper(object_type='file', **params) @@ -129,7 +170,7 @@ def search(self, query, **params): @classmethod def get_by_full_path(cls, full_path, **kwargs): _client = kwargs.pop('client', None) or cls._client or client - full_path, parts = cls.validate_path(full_path) + full_path, parts = cls.validate_full_path(full_path) return Vault._retrieve_helper( 'vault', 'name', full_path, account_domain=parts['domain'], diff --git a/solvebio/test/test_object.py b/solvebio/test/test_object.py index 2595df87..869da634 100644 --- a/solvebio/test/test_object.py +++ b/solvebio/test/test_object.py @@ -10,7 +10,8 @@ def test_object_paths(self): vaults = self.client.Vault.all() for vault in vaults: for file_ in list(vault.ls().solve_objects())[:5]: - o_path, _ = self.client.Object.validate_path(file_.full_path) + o_path, _ = self.client.Object.validate_full_path( + file_.full_path) self.assertEqual(o_path, file_.full_path) def test_object_path_cases(self): @@ -19,24 +20,72 @@ def test_object_path_cases(self): domain = user.account.domain user_vault = '{0}:user-{1}'.format(domain, user.id) test_cases = [ - ['{0}:myVault'.format(domain), '{0}:myVault:/'.format(domain)], - ['acme:myVault', 'acme:myVault:/'], ['acme:myVault:/uploads_folder', 'acme:myVault:/uploads_folder'], + ['acme:myVault/folder1/project: ABCD', + 'acme:myVault:/folder1/project: ABCD'], + ['myVault/folder1/project: ABCD', + '{0}:myVault:/folder1/project: ABCD'.format(domain)], ['myVault:/uploads_folder', '{0}:myVault:/uploads_folder'.format(domain)], # noqa - ['/uploads_folder', '{0}:/uploads_folder'.format(user_vault)], - [':/uploads_folder', '{0}:/uploads_folder'.format(user_vault)], - ['myVault/uploads_folder', '{0}:/myVault/uploads_folder'.format(user_vault)], # noqa - ['oops:myDomain:myVault', 'oops:myDomain:/myVault'], + # New full path formats + ['~/uploads_folder', '{0}:/uploads_folder'.format(user_vault)], + ['~/', '{0}:/'.format(user_vault)], + ['myVault/uploads_folder', '{0}:myVault:/uploads_folder'.format(domain)], # noqa + ['acme:myVault/uploads_folder', 'acme:myVault:/uploads_folder'], ] for case, expected in test_cases: - p, _ = self.client.Object.validate_path(case) + p, _ = self.client.Object.validate_full_path(case) self.assertEqual(p, expected) error_test_cases = [ '', + '/hello', 'myVault', - 'acme:myVault/uploads_folder' + 'oops:myDomain:myVault', + '{0}:myVault'.format(domain), + 'acme:myVault' ] for case in error_test_cases: with self.assertRaises(Exception): - v, v_paths = self.client.Object.validate_path(case) + v, v_paths = self.client.Object.validate_full_path(case) + + def test_object_path_cases_with_overrides(self): + user = self.client.User.retrieve() + domain = user.account.domain + user_vault = '{0}:user-{1}'.format(domain, user.id) + + test_cases = [ + 'acme:myVault:/folder', + 'myVault:/folder', + 'myVault/folder', + '/folder', + ] + + for case in test_cases: + p, _ = self.client.Object.validate_full_path(case, vault='foobar') + expected = '{0}:foobar:/folder'.format(domain) + self.assertEqual(p, expected) + + p, _ = self.client.Object.validate_full_path(case, vault='foo:bar') + expected = 'foo:bar:/folder' + self.assertEqual(p, expected) + + p, _ = self.client.Object.validate_full_path( + case, vault='foo:bar', path='/baz') + expected = 'foo:bar:/baz' + self.assertEqual(p, expected) + + p, _ = self.client.Object.validate_full_path( + case, vault='foo:bar', path='/baz/bon') + expected = 'foo:bar:/baz/bon' + self.assertEqual(p, expected) + + # Test cases where just the path changes + case = 'acme:myVault:/folder' + p, _ = self.client.Object.validate_full_path(case, path='foo/bar/baz') + expected = 'acme:myVault:/foo/bar/baz' + self.assertEqual(p, expected) + + case = '~/folder' + p, _ = self.client.Object.validate_full_path(case, path='foo/bar/baz') + expected = '{0}:/foo/bar/baz'.format(user_vault) + self.assertEqual(p, expected) diff --git a/solvebio/test/test_shortcuts.py b/solvebio/test/test_shortcuts.py index 30639038..e60d54df 100644 --- a/solvebio/test/test_shortcuts.py +++ b/solvebio/test/test_shortcuts.py @@ -35,7 +35,7 @@ def test_create_dataset(self, DatasetCreate, ObjectAll, VaultAll): ObjectAll.side_effect = fake_object_all VaultAll.side_effect = fake_vault_all args = ['create-dataset', 'solvebio:test_vault:/test-dataset', - '--capacity', 'small'] # noqa + '--capacity', 'small'] ds = main.main(args) self.assertEqual(ds.name, 'test-dataset') self.assertEqual(ds.path, '/test-dataset') diff --git a/solvebio/test/test_vault.py b/solvebio/test/test_vault.py index d5c0c6c3..6d1564e8 100644 --- a/solvebio/test/test_vault.py +++ b/solvebio/test/test_vault.py @@ -28,29 +28,47 @@ def test_vaults(self): self.assertTrue(f in vault, '{0} field is present'.format(f)) def test_vault_paths(self): + user = self.client.User.retrieve() + domain = user.account.domain + user_vault = 'user-{0}'.format(user.id) + vaults = self.client.Vault.all() for vault in vaults: - v, v_paths = self.client.Vault.validate_path(vault.full_path) + v, v_paths = self.client.Vault.validate_full_path(vault.full_path) self.assertEqual(v, vault.full_path) - domain = self.client.User.retrieve().account.domain test_cases = [ + ['myVault/', '{0}:myVault'.format(domain, user_vault)], ['myVault', '{0}:myVault'.format(domain)], ['{0}:myVault'.format(domain), '{0}:myVault'.format(domain)], ['acme:myVault', 'acme:myVault'], - # this assumes user f-ed and forgot the semi-colon for path + ['myVault/folder1/folder2: xyz', '{0}:myVault'.format(domain)], + ['acme:myVault/folder1/folder2: xyz', 'acme:myVault'], + ['acme:myVault:/folder1/folder2: xyz', 'acme:myVault'], + # The following are the "new" vault/path formats: + ['~/', '{0}:{1}'.format(domain, user_vault)], ['acme:myVault/uploads_folder', 'acme:myVault'], ['myVault/uploads_folder', '{0}:myVault'.format(domain)], ] for case, expected in test_cases: - v, v_paths = self.client.Vault.validate_path(case) + v, v_paths = self.client.Vault.validate_full_path(case) self.assertEqual(v, expected) error_test_cases = [ '', - 'myDomain:myVault:/the/heack', + '/', + ':', + ':/', + '::/', + 'x:', + # Underscore in domain + 'my_Domain:myVault:/the/heack', + # Space in domain + 'my Domain:my:Vault:/the/heack', + # Too many colons + 'myDomain:my:Vault:/the/heack', 'oops:myDomain:myVault', ] for case in error_test_cases: with self.assertRaises(Exception): - v, v_paths = self.client.Vault.validate_path(case) + v, v_paths = self.client.Vault.validate_full_path(case) From 26abed742d8ef21eca03659bcb455c4ff1211375 Mon Sep 17 00:00:00 2001 From: David Caplan Date: Thu, 11 Jan 2018 10:23:34 -0500 Subject: [PATCH 19/20] update readme --- README.md | 332 +++++------------------------------------------------- 1 file changed, 27 insertions(+), 305 deletions(-) diff --git a/README.md b/README.md index 72e674ab..6d27f4b5 100644 --- a/README.md +++ b/README.md @@ -7,15 +7,11 @@ SolveBio Python Client This is the SolveBio Python package and command-line interface (CLI). This module has been tested on Python 2.6+, Python 3.1+ and PyPy. -For more information about SolveBio visit [solvebio.com](https://www.solvebio.com). - - -Compatibility -------------- - This version of the Python Client is compatible with Vault-based datasets only (released on July 28th, 2017). +Developer documentation is available at [docs.solvebio.com](https://docs.solvebio.com). For more information about SolveBio visit [www.solvebio.com](https://www.solvebio.com). + Dependencies (Ubuntu) -------------------- @@ -31,46 +27,44 @@ To install them, run: sudo apt-get install libcurl4-gnutls-dev libgnutls-dev -Guided Installation -------------------- +Installation & Setup +-------------------- -To use our guided installer, open up your terminal paste this: +Install `solvebio` using `pip`: - curl -skL install.solvebio.com/python | bash + pip install solvebio +For interactive use, we recommend installing `IPython` and `gnureadline`: -Manual Installation -------------------- + pip install ipython + pip install gnureadline -You may want to first install `gnureadline` and `IPython`: - pip install gnureadline - pip install ipython +To log in, type: + solvebio login -Install `solvebio` using `pip`: - pip install solvebio +Enter your SolveBio credentials and you should be good to go! -To log in, type: +Automatic Installer (Python 2.x only) +------------------------------------- - solvebio login +NOTE: The following installer works for Python 2.x only. -Enter your credentials and you should be good to go! +Open your terminal and paste the following command: -Just type `solvebio` to enter the SolveBio Python shell or `solvebio tutorial` -for a quick guide on using SolveBio. + curl -skL install.solvebio.com/python | bash -Installing from Git -------------------- +Install from Git +---------------- pip install -e git+https://github.com/solvebio/solvebio-python.git#egg=solve - Development ----------- @@ -78,294 +72,22 @@ Development cd solve-python/ python setup.py develop -To run tests use nosetest +To run tests use `nosetest`: nosetests solvebio.test.test_dataset -Or install tox and run that: + +Or install `tox` and run: pip install tox tox -To tag new versions: - - git tag `cat solvebio/version.py | cut -d "'" -f 2` - git push --tags origin master - - -Migrating to Version 2 ----------------------- - -Version 2 of the Python client removes support for the `Depository` and -`DepositoryVersion` classes, and adds support for the `Vault` and `Object` -classes. - -A vault is similar to a filesystem in that it provides a folder-based -hierarchy in which additional folders, files, and SolveBio Datasets can be -stored. The folders, files, and SolveBio Datasets in a vault are -collectively referred to as "objects" and can be accessed using the -`Vault` or `Object` classes. - -Vaults have an advanced permission model that provides for three different -levels of access: read, write, and admin. Permissions are settable through -the SolveBio UI. For detailed information on the permission model, please -visit this link: - -https://support.solvebio.com/hc/en-us/articles/227732207 - -As part of the migration onto Version 2, SolveBio has automatically applied -the permissions set on Depositories to the new Vaults which we have created to -replace them. - -It is likely that any scripts you have written which utilize the -Python client will need to be modified to be compatible with Version 2. -Below is an exhaustive list of all the things that have changed in the -user-facing methods of the client. If you encounter any issues migrating -your code, please submit a support ticket and we would be happy to assist you. - -### Naming Conventions - -It is useful to know the different names for the various entities (or combined -entities) that are available via the Client. The naming conventions are -as follows: - -``` - -solvebio:public:/ClinVar/3.7.0-2015-12-06/Variants-GRCh37 -+------+ -(1) - +----+ - (2) -+-------------+ -(3) - +---------------------------------------+ - (4) - +-------------+ - (5) -+-------------------------------------------------------+ -(6) -``` -``` -(1) - Account Domain -(2) - Vault Name -(3) - Vault Full Path -(4) - Object Path -(5) - Object Filename -(6) - Object Full Path - -``` - -### Changes in V2 - -1. Dataset creation changes - -``` -Old: Dataset.get_or_create_by_full_name(full_name) -New: Dataset.get_or_create_by_full_path(account_domain:vault_name:/parent/path/dataset_name) -``` - -For example, if you belong to the "acme" domain, then to create a dataset -named named "EGFR_analysis" in the "/July-2017" folder of the "Research" vault, -make the following call: - -``` -Dataset.get_or_create_by_full_path('Research:/July_2017/EGFR_analysis') -Dataset.get_or_create_by_full_path('Acme:Research:/July_2017/EGFR_analysis') -``` -If you wish to auto-create the vault, add the `create_vault=True` flag. -If you wish to auto-create the folder(s), add the `create_folders=True` flag. +Support +------- -You can optionally leave off the account domain in front, but note that this -will not work if your object path includes a colon: +Developer documentation is available at [docs.solvebio.com](https://docs.solvebio.com). -``` -Dataset.get_or_create_by_full_path('Research:/July_2017/EGFR_analysis') -``` +If you experience problems with this package, please [create a GitHub Issue](https://github.com/solvebio/solvebio-python/issues). -If you wish to automatically create the vault if it does not exist, add the -`create_vault=True` flag. - -2. Dataset retrieval changes - -A dataset's "full_path" is a triplet consisting of account domain, vault -name, and the dataset's path in the vault (see above). Retrieval of a dataset -by its full path can be performed in a single call: - -``` -Dataset.get_by_full_path("account_domain:vault_name:object_path") -Dataset.get_by_full_path("solvebio:public:/ICGC/3.0.0-23/Donor") -``` - -In order to get the full path of an existing dataset, search for datasets -within a vault. - -``` -# Get all of the Clinvar datasets that are version 3 and above -v = Vault.get_by_full_path('solvebio:public') -v.datasets(query='Clinvar/3') -``` - -3. Removal of `genome_build` filter - -The `genome_build` field on the Dataset entity is no longer a supported -filter. The genome build of public datasets is now indicated in the dataset -name, e.g. `Variants-GRCh38`. - -``` -Dataset.get_by_full_path("solvebio:public:/ClinVar/3.7.0-2015-12-06/Variants-GRCh38") -``` - -4. Removal of `Depository` and `DepositoryVersion` classes. - -`Depository` has been replaced by the `Vault` class. - -`DepositoryVersion` was functionality is now provided by the `Object` class. -Objects are files, folders, or SolveBio -Datasets that exist inside a vault. As part of your account's migration onto -Version 2 of SolveBio, we have automatically moved datasets located in -Depository "X" and DepositoryVersion "Y" to a Vault named "X" and a folder named -"Y". If the dataset being migrated had the `genome_build` property set, the -dataset was renamed to `$original_name-$genome_build`. Otherwise, the name -remained unchanged. - -5. Renaming of "objects" to "solve_objects" - -The `objects` property of a resource has been renamed `solve_objects`. - -6. The `import` and `create-dataset` command-line utilities now require -`--full-path` arguments. The `dataset` argument (`test-dataset` -below) no longer can contain slashes. - -``` -create-dataset --capacity=small --full-path=acme:test:/examples test-dataset -``` - -7. Removal of DatasetCommit approval. The `auto_approve`, `is_approved` and -`approved_by` attributes have been removed. The `/approve` endpoint has also -been removed. All commits will be approved automatically. - - -Vault Browsing --------------- -### Browse - -List all the vaults you currently have access to. - - -``` -Vault.all() -``` - -### Your Personal Vault - -Each user has a personal vault that is accessible to that user only. Other -users cannot list the contents of this vault, cannot access the objects -contained in it, and cannot modify it in any way. To provide access to -objects stored in your personal vault, you must copy the objects into a -different vault. - -Your personal dataset can be retrieved using the following method: - -``` -Vault.get_personal_vault() -``` - - -### Shortcuts -Browsing the contents of a vault can be easily performed using the following -shortcuts. - -First, retrieve a vault: - -``` -vault = Vault.get_personal_vault() -vault = Vault.get_by_full_path('solvebio:public') -vault = Vault.get_by_full_path('your_account_domain:vault_name') -vault = Vault.get_by_full_path('vault_name') # Searches inside your account domain -``` - - -Then, call the appropriate method: - -``` -vault.files() -vault.folders() -vault.datasets() -vault.objects() # Includes files, folders, and datasets - -vault.files(filename='hello.txt') # Can pass filters to all of these methods -``` - - -Search for files, folders, and datasets in a vault using the `search` method: - -``` -vault.search('hello') -vault.search('hello', object_type='folder') -vault.search('hello', object_type='file') -vault.search('hello', object_type='dataset') -``` - -Creation --------- -``` -Vault.get_or_create_by_full_path('acme:test1') -Vault.get_or_create_by_full_path('test1') -``` - -File Upload ------------ -``` -v = Vault.get_personal_vault() -v.upload_file('analysis.tsv', '/') ->>> Notice: Successfully uploaded analysis.tsv to /analysis.tsv -``` - -Re-uploading the same file to the same path auto-increments the filename on -the server. This is required because no two objects can have the same full -path. - -``` -v = Vault.get_personal_vault() -v.upload_file('analysis.tsv', '/') ->>> Notice: Successfully uploaded analysis.tsv to /analysis-1.tsv -``` - - -Deletion --------- - -Deletion of any object requires a confirmation from the user. -You can disable this confirmation by passing the `force=True` flag. - -``` -folder = Object.retrieve(504311238004931284) -folder.delete() ->>> Are you sure you want to delete this object? [y/N] n ->>> Not performing deletion. -``` -``` -folder.delete(force=True) -``` - -Enhanced Command-Line File Uploading ------------------------------------- - -A new command-line method called "upload" has been added. This method -allows users to upload a file or folder to a vault. If a folder is -uploaded, calling the "upload" method again will result in a cross-checking -of the local folder and SolveBio folder, and upload/create only the -local files and folders that do not already exist on SolveBio. - -``` -solvebio upload --vault analysis --path=/july_2017 local/foo/bar -``` - -This command will create a folder named `/july_2017/bar` in the `analysis` -vault, and upload everything inside `local/foo/bar` on the local machine to -`/july_2017/bar` in that vault. - -Note that comparison is performed by filename, not by file content. Thus, the -"upload" command will never replace a remote file with a local file of the same -name but with updated contents. +For all other requests, please [email SolveBio Support](mailto:support@solvebio.com). From a6e4fd7d69b45f65834e089a86ac0a8f9f0f50d3 Mon Sep 17 00:00:00 2001 From: David Caplan Date: Thu, 11 Jan 2018 10:45:08 -0500 Subject: [PATCH 20/20] support backwards compatible create-dataset command --- solvebio/cli/data.py | 18 +++++++++++------- solvebio/test/test_shortcuts.py | 16 ++++++++++++++++ 2 files changed, 27 insertions(+), 7 deletions(-) diff --git a/solvebio/cli/data.py b/solvebio/cli/data.py index 3d897a8e..f7cd3668 100644 --- a/solvebio/cli/data.py +++ b/solvebio/cli/data.py @@ -132,9 +132,17 @@ def create_dataset(args): NOTE: genome_build has been deprecated and is no longer used. """ - # TODO: Support for a parent object path argument? - full_path, path_dict = Object.validate_full_path( - args.full_path, vault=args.vault, path=args.path) + # For backwards compatibility, the "full_path" argument + # can be a dataset filename, but only if vault and path + # are set. If vault/path are both provided and there + # are no forward-slashes in the "full_path", assume + # the user has provided a dataset filename. + if '/' not in args.full_path and args.vault and args.path: + full_path, path_dict = Object.validate_full_path( + '{0}:/{1}/{2}'.format(args.vault, args.path, args.full_path)) + else: + full_path, path_dict = Object.validate_full_path( + args.full_path, vault=args.vault, path=args.path) # Accept a template_id or a template_file if args.template_id: @@ -236,10 +244,6 @@ def import_file(args): * follow (default: False) """ - # FIXME: Does this need to be here? What about other commands? - if not solvebio.api_key: - solvebio.login() - full_path, path_dict = Object.validate_full_path( args.full_path, vault=args.vault, path=args.path) diff --git a/solvebio/test/test_shortcuts.py b/solvebio/test/test_shortcuts.py index e60d54df..7bb29bea 100644 --- a/solvebio/test/test_shortcuts.py +++ b/solvebio/test/test_shortcuts.py @@ -40,6 +40,22 @@ def test_create_dataset(self, DatasetCreate, ObjectAll, VaultAll): self.assertEqual(ds.name, 'test-dataset') self.assertEqual(ds.path, '/test-dataset') + @mock.patch('solvebio.resource.Vault.all') + @mock.patch('solvebio.resource.Object.all') + @mock.patch('solvebio.resource.Dataset.create') + def test_create_dataset_by_filename(self, DatasetCreate, ObjectAll, + VaultAll): + DatasetCreate.side_effect = fake_dataset_create + ObjectAll.side_effect = fake_object_all + VaultAll.side_effect = fake_vault_all + args = ['create-dataset', 'test-dataset-filename', + '--vault', 'solvebio:test_vault', + '--path', '/', + '--capacity', 'small'] + ds = main.main(args) + self.assertEqual(ds.name, 'test-dataset-filename') + self.assertEqual(ds.path, '/test-dataset-filename') + def _validate_tmpl_fields(self, fields): for f in fields: if f.name == 'name':