Skip to content

Commit

Permalink
try refactoring path validators with regex (#218)
Browse files Browse the repository at this point in the history
* refactor full path validators with regex
* fix issues with regexes and overrides; use explicit function names
* switch to tilde for personal vault
  • Loading branch information
davecap committed Jan 11, 2018
1 parent d8328d1 commit c3c4383
Show file tree
Hide file tree
Showing 8 changed files with 269 additions and 143 deletions.
49 changes: 16 additions & 33 deletions solvebio/cli/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,32 +16,12 @@
from solvebio.errors import ObjectTypeError, NotFoundError


def _full_path_from_args(args):
"""
Handles the following args:
* full-path
* vault
* path
Always uses "full_path" if provided (overrides vault and path).
Otherwise, attempts to use "vault" and "path".
If no paths are specified, defaults to personal-vault:/
"""
if args.full_path:
return args.full_path

return '{0}:{1}'.format(
args.vault or Vault.get_personal_vault().name,
args.path or '/')


def _assert_object_type(obj, object_type):
if obj.object_type != object_type:
raise ObjectTypeError('{0} is a {1} but must be a folder'.format(
raise ObjectTypeError('{0} is a {1} but must be a {2}'.format(
obj.path,
obj.object_type,
object_type
))


Expand All @@ -50,14 +30,14 @@ def _upload_folder(domain, vault, base_remote_path,

# Create the upload root folder if it does not exist on the remote
try:
upload_root_path, _ = Object.validate_path(
upload_root_path, _ = Object.validate_full_path(
os.path.join(base_remote_path, local_start)
)
obj = Object.get_by_full_path(upload_root_path)
_assert_object_type(obj, 'folder')
except NotFoundError:
base_remote_path, path_dict = \
Object.validate_path(base_remote_path)
Object.validate_full_path(base_remote_path)

if path_dict['path'] == '/':
parent_object_id = None
Expand Down Expand Up @@ -143,16 +123,18 @@ def create_dataset(args):
"""
Attempt to create a new dataset given the following params:
* full_path (or vault & path)
* template_id
* template_file
* capacity
* create_vault
* [argument] dataset name or full path
NOTE: genome_build has been deprecated and is no longer used.
"""
full_path = _full_path_from_args(args)
# TODO: Support for a parent object path argument?
full_path, path_dict = Object.validate_full_path(
args.full_path, vault=args.vault, path=args.path)

# Accept a template_id or a template_file
if args.template_id:
Expand Down Expand Up @@ -214,13 +196,11 @@ def upload(args):
Given a folder or file, upload all the folders and files contained
within it, skipping ones that already exist on the remote.
"""

full_path = _full_path_from_args(args)
base_remote_path, path_dict = Object.validate_path(full_path)
vault_path = path_dict['domain'] + ':' + path_dict['vault']
base_remote_path, path_dict = Object.validate_full_path(
args.full_path, vault=args.vault, path=args.path)

# Assert the vault exists and is accessible
vault = Vault.get_by_full_path(vault_path)
vault = Vault.get_by_full_path(path_dict['vault_full_path'])

# If not the vault root, validate remote path exists and is a folder
if path_dict['path'] != '/':
Expand All @@ -247,7 +227,9 @@ def import_file(args):
* create_dataset
* template_id
* full_path (or vault & path)
* full_path
* vault (optional, overrides the vault in full_path)
* path (optional, overrides the path in full_path)
* commit_mode
* capacity
* file (list)
Expand All @@ -258,7 +240,8 @@ def import_file(args):
if not solvebio.api_key:
solvebio.login()

full_path = _full_path_from_args(args)
full_path, path_dict = Object.validate_full_path(
args.full_path, vault=args.vault, path=args.path)

# Ensure the dataset exists. Create if necessary.
if args.create_dataset:
Expand Down
39 changes: 19 additions & 20 deletions solvebio/cli/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,15 +97,14 @@ class SolveArgumentParser(argparse.ArgumentParser):
},
{
'flags': '--vault',
'help':
'The vault containing the dataset (use with --path). '
'help': 'The vault containing the dataset. '
'Defaults to your personal vault. '
'Overridden by --full-path'
'Overrides the vault component of --full-path'
},
{
'flags': '--path',
'help': 'The path to the dataset (relative to the vault). '
'Used with --vault, overridden by --full-path'
'help': 'The path to the dataset (relative to a vault). '
'Overrides the path component of --full-path'
},
{
'name': 'file',
Expand Down Expand Up @@ -140,23 +139,25 @@ class SolveArgumentParser(argparse.ArgumentParser):
'small (default, <100M records), '
'medium (<500M), large (>=500M)'
},
{
'flags': '--full-path',
'help': 'The full path to the dataset in the format: '
'"domain:vault:/path/dataset". '
'Overrides --vault and --path'
},
{
'flags': '--vault',
'help':
'The vault containing the dataset (use with --path). '
'Defaults to your personal vault. '
'Overridden by --full-path'
'The vault containing the dataset. '
'Overrides the vault component of the full path argument'
},
{
'flags': '--path',
'help': 'The path to the dataset (relative to the vault). '
'Used with --vault, overridden by --full-path'
'Overrides the path component of the full path argument'
},
{
'name': 'full_path',
'help': 'The full path to the dataset in the format: '
'"domain:vault:/path/dataset". '
'Defaults to your personal vault if no vault is provided. '
'Defaults to the vault root if no path is provided. '
'Override the vault with --vault '
'and/or the path with --path'
},
]
},
Expand All @@ -167,21 +168,19 @@ class SolveArgumentParser(argparse.ArgumentParser):
{
'flags': '--full-path',
'help': 'The full path where the files and folders should '
'be created, defaults to the root of your personal vault. '
'Overrides --vault and --path'
'be created, defaults to the root of your personal vault'
},
{
'flags': '--vault',
'help': 'The vault where the files will be uploaded. '
'Defaults to your personal vault. '
'Overridden by --full-path'
'Overrides the vault component of --full-path'
},
{
'flags': '--path',
'help': 'The path (relative to a vault) '
'where the files will be uploaded. '
'Defaults to the root directory (/). '
'Overridden by --full-path'
'Overrides the path component of --full-path'
},
{
'name': 'local_path',
Expand Down
4 changes: 2 additions & 2 deletions solvebio/resource/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def make_full_path(cls, vault_name, path, name, **kwargs):
def get_by_full_path(cls, full_path, **kwargs):
from solvebio import Object
_client = kwargs.pop('client', None) or cls._client or client
test_path, _ = Object.validate_path(full_path, client=_client)
test_path, _ = Object.validate_full_path(full_path, client=_client)
obj = Object.get_by_full_path(test_path, client=_client)
dataset = Dataset.retrieve(obj['dataset_id'], client=_client, **kwargs)
return dataset
Expand All @@ -83,7 +83,7 @@ def get_or_create_by_full_path(cls, full_path, **kwargs):
pass

# Dataset not found, create it step-by-step
full_path, parts = Object.validate_path(full_path, client=_client)
full_path, parts = Object.validate_full_path(full_path, client=_client)

if create_vault:
vault = Vault.get_or_create_by_full_path(
Expand Down
136 changes: 86 additions & 50 deletions solvebio/resource/object.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,67 +42,103 @@ class Object(CreateableAPIResource,
('description', 'Description'),
)

# Regex describing an object path.
PATH_RE = re.compile(r'^[^\/]*(?P<path>(\/[^\/]*)+)$')

@classmethod
def validate_path(cls, path, **kwargs):
""" Helper method to return a full path
def validate_full_path(cls, full_path, **kwargs):
"""Helper method to parse a full or partial path and
return a full path as well as a dict containing path parts.
Uses the following rules when processing the path:
* If no domain, uses the current user's account domain
* If no vault, uses the current user's personal vault.
* If no path, uses '/' (vault root)
Returns a tuple containing:
* The validated full_path
* A dictionary with the components:
* domain: the domain of the vault
* vault: the name of the vault, without domain
* vault_full_path: domain:vault
* path: the object path within the vault
* parent_path: the parent path to the object
* filename: the object's filename (if any)
* full_path: the validated full path
The following components may be overridden using kwargs:
* vault
* path
Object paths (also known as "paths") must begin with a forward slash.
The following path formats are supported:
domain:vault:/path -> object "path" in the root of "domain:vault"
domain:vault/path -> object "path" in the root of "domain:vault"
vault:/path -> object "path" in the root of "vault"
vault/path -> object "path" in the root of "vault"
~/path -> object "path" in the root of personal vault
vault/ -> root of "vault"
~/ -> root of your personal vault
The following two formats are not supported:
path -> invalid/ambiguous path (exception)
vault:path -> invalid/ambiguous path (exception)
vault:path/path -> unsupported, interpreted as domain:vault/path
If no account_domain, assumes user's account domain
If no vault, uses personal vault.
If no path, uses /
"""
from solvebio.resource.vault import Vault

_client = kwargs.pop('client', None) or cls._client or client

# Remove double slashes and leading ':'
path = re.sub('//+', '/', path.lstrip(':'))

parts = path.split(':', 2)
if len(parts) == 3:
account_domain, vault_name, object_path = parts
elif len(parts) == 2:
# if no slash assume user means root
if '/' not in parts[1]:
account_domain, vault_name = parts
object_path = '/'
else:
# if second part begins with slash, assume missing domain
if parts[1][0] == '/':
account_domain = \
_client.get('/v1/user', {})['account']['domain']
vault_name, object_path = parts
else:
raise Exception('Full path must be of the format: '
'"vault_name:/object_path" or '
'"account_domain:vault_name:/object_path"')
# TODO maybe no good
# assumes missing ":" between vault and path
# mydomain:myvault/here/is/path
# myvault:here/is/path
# account_domain = parts[0]
# vault_name, object_path = parts[1].split('/', 1)
if not full_path:
raise Exception(
'Invalid path: ',
'Full path must be in one of the following formats: '
'"vault:/path", "domain:vault:/path", or "~/path"')

# Parse the vault's full_path, using overrides if any
input_vault = kwargs.get('vault') or full_path
try:
vault_full_path, path_dict = \
Vault.validate_full_path(input_vault, client=_client)
except:
raise Exception(
'Could not determine vault from "{0}". '
'Full path must be in one of the following formats: '
'"vault:/path", "domain:vault:/path", or "~/path"'
.format(input_vault))

if kwargs.get('path'):
# Allow override of the object_path.
full_path = '{0}:/{1}'.format(vault_full_path, kwargs['path'])

match = cls.PATH_RE.match(full_path)
if match:
object_path = match.groupdict()['path']
else:
# if slash assume user means private vault
if '/' in parts[0]:
vault = Vault.get_personal_vault(client=_client)
account_domain, vault_name = vault.full_path.split(':')
object_path = parts[0]
else:
raise Exception('Full path must be of the format: '
'"vault_name:/object_path" or '
'"account_domain:vault_name:/object_path"')

if object_path[0] != '/':
object_path = '/' + object_path

# Strip trailing slash
raise Exception(
'Cannot find a valid object path in "{0}". '
'Full path must be in one of the following formats: '
'"vault:/path", "domain:vault:/path", or "~/path"'
.format(full_path))

# Remove double slashes
object_path = re.sub('//+', '/', object_path)
if object_path != '/':
# Remove trailing slash
object_path = object_path.rstrip('/')

full_path = ':'.join([account_domain, vault_name, object_path])
return full_path, dict(domain=account_domain,
vault=vault_name,
path=object_path)
path_dict['path'] = object_path
# TODO: parent_path and filename
full_path = '{domain}:{vault}:{path}'.format(**path_dict)
path_dict['full_path'] = full_path
return full_path, path_dict

@classmethod
def get_by_full_path(cls, full_path, **params):
Expand Down
Loading

0 comments on commit c3c4383

Please sign in to comment.