From d7a587c1336a0e25102245add8a596f20fe88f4d Mon Sep 17 00:00:00 2001 From: Jeff Hull Date: Fri, 24 Apr 2020 11:06:27 -0400 Subject: [PATCH 1/7] get or create for an Object (also closes #326) --- solvebio/resource/dataset.py | 83 +----------------------- solvebio/resource/object.py | 120 ++++++++++++++++++++++++++++++++--- solvebio/resource/vault.py | 11 ++-- 3 files changed, 120 insertions(+), 94 deletions(-) diff --git a/solvebio/resource/dataset.py b/solvebio/resource/dataset.py index b3636cbf..e2af79e7 100644 --- a/solvebio/resource/dataset.py +++ b/solvebio/resource/dataset.py @@ -3,7 +3,6 @@ from ..client import client from ..query import Query -from ..errors import NotFoundError from .solveobject import convert_to_solve_object from .apiresource import CreateableAPIResource @@ -67,86 +66,10 @@ def get_by_full_path(cls, full_path, **kwargs): @classmethod def get_or_create_by_full_path(cls, full_path, **kwargs): - from solvebio import Vault from solvebio import Object - - _client = kwargs.pop('client', None) or cls._client or client - create_vault = kwargs.pop('create_vault', False) - create_folders = kwargs.pop('create_folders', True) - - try: - return Dataset.get_by_full_path(full_path, assert_type='dataset', - client=_client) - except NotFoundError: - pass - - # Dataset not found, create it step-by-step - full_path, parts = Object.validate_full_path(full_path, client=_client) - - if create_vault: - vault = Vault.get_or_create_by_full_path( - '{0}:{1}'.format(parts['domain'], parts['vault']), - client=_client) - else: - vaults = Vault.all(account_domain=parts['domain'], - name=parts['vault'], - client=_client) - if len(vaults.solve_objects()) == 0: - raise Exception( - 'Vault does not exist with name {0}:{1}'.format( - parts['domain'], parts['vault']) - ) - vault = vaults.solve_objects()[0] - - # Create the folders to hold the dataset if they do not already exist. - object_path = parts['path'] - curr_path = os.path.dirname(object_path) - folders_to_create = [] - new_folders = [] - id_map = {'/': None} - - while curr_path != '/': - try: - obj = Object.get_by_path(curr_path, - vault_id=vault.id, - assert_type='folder', - client=_client) - id_map[curr_path] = obj.id - break - except NotFoundError: - if not create_folders: - raise Exception('Folder {} does not exist. Pass ' - 'create_folders=True to auto-create ' - 'missing folders') - - folders_to_create.append(curr_path) - curr_path = '/'.join(curr_path.split('/')[:-1]) - if curr_path == '': - break - - for folder in reversed(folders_to_create): - new_folder = Object.create( - object_type='folder', - vault_id=vault.id, - filename=os.path.basename(folder), - parent_object_id=id_map[os.path.dirname(folder)], - client=_client - ) - new_folders.append(new_folder) - id_map[folder] = new_folder.id - - if os.path.dirname(object_path) == '/': - parent_folder_id = None - elif new_folders: - parent_folder_id = new_folders[-1].id - else: - parent_folder_id = id_map[os.path.dirname(object_path)] - - return Dataset.create(name=os.path.basename(object_path), - vault_id=vault.id, - vault_parent_object_id=parent_folder_id, - client=_client, - **kwargs) + # Assert this is a dataset + kwargs['assert_type'] = 'dataset' + return Object.get_or_create_by_full_path(full_path, **kwargs) def saved_queries(self, **params): from solvebio import SavedQuery diff --git a/solvebio/resource/object.py b/solvebio/resource/object.py index 2030fde8..b565233f 100644 --- a/solvebio/resource/object.py +++ b/solvebio/resource/object.py @@ -145,6 +145,18 @@ def validate_full_path(cls, full_path, **kwargs): return path_dict['full_path'], path_dict + @classmethod + def get_by_path(cls, path, **params): + assert_type = params.pop('assert_type', None) + params.update({'path': path}) + obj = cls._retrieve_helper('object', 'path', path, **params) + if obj and assert_type and obj['object_type'] != assert_type: + raise SolveError( + "Expected a {} but found a {} at {}" + .format(assert_type, obj['object_type'], path)) + + return obj + @classmethod def get_by_full_path(cls, full_path, **params): # Don't pop client from params since **params is used below @@ -162,16 +174,106 @@ def get_by_full_path(cls, full_path, **params): return obj @classmethod - def get_by_path(cls, path, **params): - assert_type = params.pop('assert_type', None) - params.update({'path': path}) - obj = cls._retrieve_helper('object', 'path', path, **params) - if obj and assert_type and obj['object_type'] != assert_type: - raise SolveError( - "Expected a {} but found a {} at {}" - .format(assert_type, obj['object_type'], path)) + def get_or_create_by_full_path(cls, full_path, **kwargs): + from solvebio import Vault + from solvebio import Object - return obj + _client = kwargs.pop('client', None) or cls._client or client + create_vault = kwargs.pop('create_vault', False) + create_folders = kwargs.pop('create_folders', True) + + # Check for object type assertion, if not explicitly added, see + # if user has passed object_type, as their intent was to get/create + # an object of that type. + assert_type = kwargs.pop('assert_type', + kwargs.get('object_type', None)) + + try: + return cls.get_by_full_path( + full_path, assert_type=assert_type, client=_client) + except NotFoundError: + pass + + # Object type required when creating Object + object_type = kwargs.get('object_type') + if not object_type: + raise Exception("'object_type' is required when creating a new " + "Object. Pass one of: file, folder, dataset") + + # TODO should we require file contents? + # Technically a user could then use this object to the call + # upload_file() + # if object_type == 'file' and not kwargs.get('content'): + # raise Exception('') + + # Object not found, create it step-by-step + full_path, parts = Object.validate_full_path(full_path, client=_client) + + if create_vault: + vault = Vault.get_or_create_by_full_path( + '{0}:{1}'.format(parts['domain'], parts['vault']), + client=_client) + else: + vaults = Vault.all(account_domain=parts['domain'], + name=parts['vault'], + client=_client) + if len(vaults.solve_objects()) == 0: + raise Exception( + 'Vault with name {0}:{1} does not exist. Pass ' + 'create_vault=True to auto-create'.format( + parts['domain'], parts['vault']) + ) + vault = vaults.solve_objects()[0] + + # Create the folders to hold the object if they do not already exist. + object_path = parts['path'] + curr_path = os.path.dirname(object_path) + folders_to_create = [] + new_folders = [] + id_map = {'/': None} + + while curr_path != '/': + try: + obj = Object.get_by_path(curr_path, + vault_id=vault.id, + assert_type='folder', + client=_client) + id_map[curr_path] = obj.id + break + except NotFoundError: + if not create_folders: + raise Exception('Folder {} does not exist. Pass ' + 'create_folders=True to auto-create ' + 'missing folders') + + folders_to_create.append(curr_path) + curr_path = '/'.join(curr_path.split('/')[:-1]) + if curr_path == '': + break + + for folder in reversed(folders_to_create): + new_folder = Object.create( + object_type='folder', + vault_id=vault.id, + filename=os.path.basename(folder), + parent_object_id=id_map[os.path.dirname(folder)], + client=_client + ) + new_folders.append(new_folder) + id_map[folder] = new_folder.id + + if os.path.dirname(object_path) == '/': + parent_folder_id = None + elif new_folders: + parent_folder_id = new_folders[-1].id + else: + parent_folder_id = id_map[os.path.dirname(object_path)] + + return Object.create(filename=os.path.basename(object_path), + vault_id=vault.id, + parent_object_id=parent_folder_id, + client=_client, + **kwargs) @classmethod def upload_file(cls, local_path, remote_path, vault_full_path, **kwargs): diff --git a/solvebio/resource/vault.py b/solvebio/resource/vault.py index ae0b7613..a834efb2 100644 --- a/solvebio/resource/vault.py +++ b/solvebio/resource/vault.py @@ -138,19 +138,20 @@ def _get_parent_folder(self, path): ) def create_dataset(self, name, **params): - from solvebio import Dataset + from solvebio import Object params['vault_id'] = self.id + params['object_type'] = 'dataset' path = params.pop('path', None) if path == '/' or path is None: - params['vault_parent_object_id'] = None + params['parent_object_id'] = None else: parent_object = self._get_parent_folder(path) - params['vault_parent_object_id'] = parent_object.id + params['parent_object_id'] = parent_object.id - params['name'] = name - return Dataset.create(**params) + params['filename'] = name + return Object.create(**params) def create_folder(self, filename, **params): from solvebio import Object From a42c8aa8387ea9e0e3387be7381692d3607c326d Mon Sep 17 00:00:00 2001 From: Jeff Hull Date: Fri, 24 Apr 2020 12:58:13 -0400 Subject: [PATCH 2/7] add Object getattr for #287 --- solvebio/cli/data.py | 23 +++++++++++++---------- solvebio/resource/dataset.py | 5 +++++ solvebio/resource/object.py | 30 ++++++++++++++++++++---------- solvebio/test/client_mocks.py | 10 +++++++++- solvebio/test/test_shortcuts.py | 33 ++++++++++++++++----------------- 5 files changed, 63 insertions(+), 38 deletions(-) diff --git a/solvebio/cli/data.py b/solvebio/cli/data.py index 9694acc4..88e1d3dd 100644 --- a/solvebio/cli/data.py +++ b/solvebio/cli/data.py @@ -14,6 +14,9 @@ from solvebio import Vault from solvebio import Object +from solvebio import Dataset +from solvebio import DatasetImport +from solvebio import DatasetTemplate from solvebio.utils.files import check_gzip_path from solvebio.errors import SolveError from solvebio.errors import NotFoundError @@ -157,11 +160,11 @@ def _create_template_from_file(template_file, dry_run=False): sys.exit(1) if dry_run: - template = solvebio.DatasetTemplate(**template_json) + template = DatasetTemplate(**template_json) print("A new dataset template will be created from: {0}" .format(template_file)) else: - template = solvebio.DatasetTemplate.create(**template_json) + template = DatasetTemplate.create(**template_json) print("A new dataset template was created with id: {0}" .format(template.id)) @@ -189,7 +192,7 @@ def create_dataset(args, template=None): try: # Fail if a dataset already exists. - solvebio.Dataset.get_by_full_path(full_path) + Object.get_by_full_path(full_path, assert_type='dataset') print('A dataset already exists at path: {0}'.format(full_path)) sys.exit(1) except NotFoundError: @@ -202,8 +205,8 @@ def create_dataset(args, template=None): pass elif args.template_id: try: - template = solvebio.DatasetTemplate.retrieve(args.template_id) - except solvebio.SolveError as e: + template = DatasetTemplate.retrieve(args.template_id) + except SolveError as e: if e.status_code != 404: raise e print("No template with ID {0} found!".format(args.template_id)) @@ -256,7 +259,7 @@ def create_dataset(args, template=None): print("Metadata: {}".format(metadata)) return - return solvebio.Dataset.get_or_create_by_full_path( + return Dataset.get_or_create_by_full_path( full_path, capacity=args.capacity, fields=fields, @@ -394,8 +397,8 @@ def import_file(args): if args.template_id: try: - template = solvebio.DatasetTemplate.retrieve(args.template_id) - except solvebio.SolveError as e: + template = DatasetTemplate.retrieve(args.template_id) + except SolveError as e: if e.status_code != 404: raise e print("No template with ID {0} found!".format(args.template_id)) @@ -410,7 +413,7 @@ def import_file(args): dataset = create_dataset(args, template=template) else: try: - dataset = solvebio.Dataset.get_by_full_path(full_path) + dataset = Object.get_by_full_path(full_path, assert_type='dataset') except solvebio.errors.NotFoundError: print("Dataset not found: {0}".format(full_path)) print("Tip: use the --create-dataset flag " @@ -442,7 +445,7 @@ def import_file(args): kwargs.update(template.import_params) # Create the import - import_ = solvebio.DatasetImport.create( + import_ = DatasetImport.create( dataset_id=dataset.id, commit_mode=args.commit_mode, **kwargs diff --git a/solvebio/resource/dataset.py b/solvebio/resource/dataset.py index e2af79e7..0d43e01b 100644 --- a/solvebio/resource/dataset.py +++ b/solvebio/resource/dataset.py @@ -38,6 +38,10 @@ class Dataset(CreateableAPIResource, def make_full_path(cls, vault_name, path, name, **kwargs): from solvebio import SolveError + print('[Deprecated] Warning this method has been deprecated' + 'and will be removed in a future released.' + 'Use self.vault_object.full_path') + _client = kwargs.pop('client', None) or cls._client or client try: @@ -69,6 +73,7 @@ def get_or_create_by_full_path(cls, full_path, **kwargs): from solvebio import Object # Assert this is a dataset kwargs['assert_type'] = 'dataset' + kwargs['object_type'] = 'dataset' return Object.get_or_create_by_full_path(full_path, **kwargs) def saved_queries(self, **params): diff --git a/solvebio/resource/object.py b/solvebio/resource/object.py index b565233f..749121b3 100644 --- a/solvebio/resource/object.py +++ b/solvebio/resource/object.py @@ -418,17 +418,27 @@ def objects(self, **params): def ls(self, **params): return self.objects(**params) - def query(self, query=None, **params): - """Shortcut to query the underlying Dataset object""" - from solvebio import Dataset + def __getattr__(self, name): + """Shortcut to access attributes of the underlying Dataset resource""" + from solvebio.resource import Dataset + + valid_attrs = [ + # query data + 'query', 'lookup', + # transform data + 'import_file', 'export', 'migrate', + # dataset meta + 'fields', 'template', 'imports', 'commits', + # helpers + 'activity', 'saved_queries' + ] + if name in valid_attrs and self.is_dataset: + return getattr(Dataset(self.dataset_id, client=self._client), name) - if not self.is_dataset: - raise SolveError( - "The query method can only be used by a dataset. Found a {}" - .format(self.object_type)) - - return Dataset(self.dataset_id, client=self._client).query( - query=query, **params) + try: + return self[name] + except KeyError as err: + raise AttributeError(*err.args) @property def parent(self): diff --git a/solvebio/test/client_mocks.py b/solvebio/test/client_mocks.py index 6476c869..f47fdf3c 100644 --- a/solvebio/test/client_mocks.py +++ b/solvebio/test/client_mocks.py @@ -41,7 +41,7 @@ def solve_objects(self): return ExtendedList([self.create()]) def update_paths(self): - """Sets vault_name, path and full_path""" + """Sets vault_name, path and full_path based on mock inputs""" if not self.object['vault_name']: self.object['vault_name'] = 'mock_vault' @@ -55,6 +55,11 @@ def update_paths(self): self.object['full_path'] = 'solvebio:{0}:{1}'.format( self.object['vault_name'], self.object['path']) + def update_dataset(self): + """Sets dataset_id if object is a dataset""" + if self.object['object_type'] == 'dataset': + self.object['dataset_id'] = self.object['id'] + class FakeMigrationResponse(Fake201Response): @@ -122,6 +127,7 @@ def __init__(self, data): } self.object.update(data) self.update_paths() + self.update_dataset() class FakeDatasetResponse(Fake201Response): @@ -220,6 +226,8 @@ def fake_object_retrieve(*args, **kwargs): def fake_dataset_create(*args, **kwargs): + # Dataset.create() is deprecated + print("WARNING: Your code likely wants to be using FakeObjectCreate.") return FakeDatasetResponse(kwargs).create() diff --git a/solvebio/test/test_shortcuts.py b/solvebio/test/test_shortcuts.py index 38ac5b61..b83c01b1 100644 --- a/solvebio/test/test_shortcuts.py +++ b/solvebio/test/test_shortcuts.py @@ -20,7 +20,6 @@ from solvebio.test.client_mocks import fake_object_all from solvebio.test.client_mocks import fake_object_create from solvebio.test.client_mocks import fake_object_retrieve -from solvebio.test.client_mocks import fake_dataset_create from solvebio.test.client_mocks import fake_dataset_tmpl_create from solvebio.test.client_mocks import fake_dataset_tmpl_retrieve from solvebio.test.client_mocks import fake_dataset_import_create @@ -43,23 +42,23 @@ def setUp(self): @mock.patch('solvebio.resource.Vault.all') @mock.patch('solvebio.resource.Object.all') - @mock.patch('solvebio.resource.Dataset.create') + @mock.patch('solvebio.resource.Object.create') def test_create_dataset(self, DatasetCreate, ObjectAll, VaultAll): - DatasetCreate.side_effect = fake_dataset_create + DatasetCreate.side_effect = fake_object_create ObjectAll.side_effect = fake_object_all VaultAll.side_effect = fake_vault_all args = ['create-dataset', 'solvebio:test_vault:/test-dataset', '--capacity', 'small'] ds = main.main(args) - self.assertEqual(ds.name, 'test-dataset') + self.assertEqual(ds.filename, 'test-dataset') self.assertEqual(ds.path, '/test-dataset') @mock.patch('solvebio.resource.Vault.all') @mock.patch('solvebio.resource.Object.all') - @mock.patch('solvebio.resource.Dataset.create') + @mock.patch('solvebio.resource.Object.create') def test_create_dataset_by_filename(self, DatasetCreate, ObjectAll, VaultAll): - DatasetCreate.side_effect = fake_dataset_create + DatasetCreate.side_effect = fake_object_create ObjectAll.side_effect = fake_object_all VaultAll.side_effect = fake_vault_all args = [ @@ -71,7 +70,7 @@ def test_create_dataset_by_filename(self, DatasetCreate, ObjectAll, '--metadata', 'TEST2=tag2', ] ds = main.main(args) - self.assertEqual(ds.name, 'test-dataset-filename') + self.assertEqual(ds.filename, 'test-dataset-filename') self.assertEqual(ds.path, '/test-dataset-filename') self.assertEqual(ds.capacity, 'small') self.assertEqual(ds.tags, ['tag_test']) @@ -90,13 +89,13 @@ def _validate_tmpl_fields(self, fields): @mock.patch('solvebio.resource.Vault.all') @mock.patch('solvebio.resource.Object.all') - @mock.patch('solvebio.resource.Dataset.create') + @mock.patch('solvebio.resource.Object.create') @mock.patch('solvebio.resource.DatasetTemplate.create') def test_create_dataset_upload_template(self, TmplCreate, DatasetCreate, ObjectAll, VaultAll): TmplCreate.side_effect = fake_dataset_tmpl_create - DatasetCreate.side_effect = fake_dataset_create + DatasetCreate.side_effect = fake_object_create ObjectAll.side_effect = fake_object_all VaultAll.side_effect = fake_vault_all @@ -112,14 +111,14 @@ def test_create_dataset_upload_template(self, TmplCreate, @mock.patch('solvebio.resource.Vault.all') @mock.patch('solvebio.resource.Object.all') - @mock.patch('solvebio.resource.Dataset.create') + @mock.patch('solvebio.resource.Object.create') @mock.patch('solvebio.resource.DatasetTemplate.retrieve') @mock.patch('solvebio.resource.DatasetTemplate.create') def test_create_dataset_template_id(self, TmplCreate, TmplRetrieve, DatasetCreate, ObjectAll, VaultAll): VaultAll.side_effect = fake_vault_all ObjectAll.side_effect = fake_object_all - DatasetCreate.side_effect = fake_dataset_create + DatasetCreate.side_effect = fake_object_create TmplRetrieve.side_effect = fake_dataset_tmpl_retrieve TmplCreate.side_effect = fake_dataset_tmpl_create @@ -145,15 +144,15 @@ def test_create_dataset_template_id(self, TmplCreate, TmplRetrieve, @mock.patch('solvebio.resource.Vault.all') @mock.patch('solvebio.resource.Object.all') @mock.patch('solvebio.resource.Object.upload_file') - @mock.patch('solvebio.resource.Dataset.create') + @mock.patch('solvebio.resource.Object.create') @mock.patch('solvebio.resource.DatasetImport.create') - def _test_import_file(self, args, DatasetImportCreate, DatasetCreate, - ObjectCreate, ObjectAll, VaultAll, VaultLookup, + def _test_import_file(self, args, DatasetImportCreate, ObjectCreate, + UploadFile, ObjectAll, VaultAll, VaultLookup, UploadPath, TmplCreate, TmplRetrieve): DatasetImportCreate.side_effect = fake_dataset_import_create - DatasetCreate.side_effect = fake_dataset_create - ObjectAll.side_effect = fake_object_all ObjectCreate.side_effect = fake_object_create + UploadFile.side_effect = fake_object_create + ObjectAll.side_effect = fake_object_all VaultAll.side_effect = fake_vault_all UploadPath.side_effect = upload_path VaultLookup.side_effect = fake_vault_create @@ -199,7 +198,7 @@ def test_import_tilde(self): ]: args = ['import', '--create-dataset', dataset_path, file_] imports, ds = self._test_import_file(args) - self.assertEqual(ds.name, 'test-dataset') + self.assertEqual(ds.filename, 'test-dataset') # should be a manifest with a single file self.assertEqual(len(imports[0].manifest['files']), 1) From 5d873718e8e1d7636d794e6438d3864c46d7cbb6 Mon Sep 17 00:00:00 2001 From: Jeff Hull Date: Fri, 24 Apr 2020 13:42:29 -0400 Subject: [PATCH 3/7] handle other cases --- solvebio/resource/dataset.py | 20 ++++++++------------ solvebio/resource/object.py | 22 ++++++++++++++++++---- solvebio/test/test_object.py | 35 +++++++++++++++++++++++++++++++++++ 3 files changed, 61 insertions(+), 16 deletions(-) diff --git a/solvebio/resource/dataset.py b/solvebio/resource/dataset.py index 0d43e01b..e5476659 100644 --- a/solvebio/resource/dataset.py +++ b/solvebio/resource/dataset.py @@ -94,9 +94,8 @@ def saved_queries(self, **params): def fields(self, name=None, **params): if 'fields_url' not in self: - raise Exception( - 'Please use Dataset.retrieve({ID}) before looking ' - 'up fields') + # Dataset object may not have been retrieved. Grab it. + self.refresh() if name: params.update({ @@ -118,18 +117,16 @@ def fields(self, name=None, **params): def template(self, **params): if 'template_url' not in self: - raise Exception( - 'Please use Dataset.retrieve({ID}) before retrieving ' - 'a template') + # Dataset object may not have been retrieved. Grab it. + self.refresh() response = self._client.get(self.template_url, params) return convert_to_solve_object(response, client=self._client) def commits(self, **params): if 'commits_url' not in self: - raise Exception( - 'Please use Dataset.retrieve({ID}) before looking ' - 'up commits') + # Dataset object may not have been retrieved. Grab it. + self.refresh() response = self._client.get(self.commits_url, params) results = convert_to_solve_object(response, client=self._client) @@ -142,9 +139,8 @@ def commits(self, **params): def imports(self, **params): if 'imports_url' not in self: - raise Exception( - 'Please use Dataset.retrieve({ID}) before looking ' - 'up imports') + # Dataset object may not have been retrieved. Grab it. + self.refresh() response = self._client.get(self.imports_url, params) results = convert_to_solve_object(response, client=self._client) diff --git a/solvebio/resource/object.py b/solvebio/resource/object.py index 1e9c65fe..3b290d19 100644 --- a/solvebio/resource/object.py +++ b/solvebio/resource/object.py @@ -422,9 +422,10 @@ def __getattr__(self, name): """Shortcut to access attributes of the underlying Dataset resource""" from solvebio.resource import Dataset - valid_attrs = [ + # Valid override attributes that let Object act like a Dataset + valid_dataset_attrs = [ # query data - 'query', 'lookup', + 'query', 'lookup', 'beacon', # transform data 'import_file', 'export', 'migrate', # dataset meta @@ -432,14 +433,27 @@ def __getattr__(self, name): # helpers 'activity', 'saved_queries' ] - if name in valid_attrs and self.is_dataset: - return getattr(Dataset(self.dataset_id, client=self._client), name) try: return self[name] except KeyError as err: + if name in valid_dataset_attrs and self.is_dataset: + return getattr( + Dataset(self.dataset_id, client=self._client), name) + raise AttributeError(*err.args) + @property + def dataset(self): + """ Returns the dataset object """ + from solvebio import Dataset + if not self.is_dataset: + raise SolveError( + "Only dataset objects have a Dataset resource. This is a {}" + .format(self.object_type)) + + return Dataset.retrieve(self['dataset_id'], client=self._client) + @property def parent(self): """ Returns the parent object """ diff --git a/solvebio/test/test_object.py b/solvebio/test/test_object.py index da0fbcb1..8deed432 100644 --- a/solvebio/test/test_object.py +++ b/solvebio/test/test_object.py @@ -3,6 +3,7 @@ from .helper import SolveBioTestCase from solvebio.test.client_mocks import fake_object_create +from solvebio.test.client_mocks import fake_dataset_create class ObjectTests(SolveBioTestCase): @@ -131,3 +132,37 @@ def test_object_has_tag(self, ObjectMock): obj = self.client.Object.create(name='blah_untagged') self.assertEqual(obj.tags, []) self.assertFalse(obj.has_tag("foo")) + + @mock.patch('solvebio.resource.Dataset.create') + @mock.patch('solvebio.resource.Object.create') + def test_object_dataset_getattr(self, ObjectCreate, DatasetCreate): + ObjectCreate.side_effect = fake_object_create + DatasetCreate.side_effect = fake_dataset_create + + valid_attrs = [ + 'query', 'lookup', 'beacon', + 'import_file', 'export', 'migrate', + 'fields', 'template', 'imports', 'commits', + 'activity', 'saved_queries' + ] + + ds = self.client.Dataset.create(name='foo') + ds_obj = self.client.Object.create(name='foo_dataset', + object_type='dataset') + file_ = self.client.Object.create(name='foo_file', + object_type='file') + folder_ = self.client.Object.create(name='foo_folder', + object_type='folder') + for attr in valid_attrs: + self.assertTrue(getattr(ds, attr)) + self.assertTrue(getattr(ds_obj, attr)) + with self.assertRaises(AttributeError): + getattr(file_, attr) + with self.assertRaises(AttributeError): + getattr(folder_, attr) + + # Test that any old attr doesnt work + fake_attr = 'foobar' + for obj in [file_, folder_, ds, ds_obj]: + with self.assertRaises(AttributeError): + getattr(obj, fake_attr) From 63554cc0b87396e5a99986cb72c58880c0884718 Mon Sep 17 00:00:00 2001 From: Jeff Hull Date: Fri, 24 Apr 2020 14:39:36 -0400 Subject: [PATCH 4/7] update --- solvebio/resource/dataset.py | 4 ---- solvebio/resource/object.py | 2 +- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/solvebio/resource/dataset.py b/solvebio/resource/dataset.py index e5476659..27ec7c1f 100644 --- a/solvebio/resource/dataset.py +++ b/solvebio/resource/dataset.py @@ -38,10 +38,6 @@ class Dataset(CreateableAPIResource, def make_full_path(cls, vault_name, path, name, **kwargs): from solvebio import SolveError - print('[Deprecated] Warning this method has been deprecated' - 'and will be removed in a future released.' - 'Use self.vault_object.full_path') - _client = kwargs.pop('client', None) or cls._client or client try: diff --git a/solvebio/resource/object.py b/solvebio/resource/object.py index 3b290d19..2a6e770d 100644 --- a/solvebio/resource/object.py +++ b/solvebio/resource/object.py @@ -452,7 +452,7 @@ def dataset(self): "Only dataset objects have a Dataset resource. This is a {}" .format(self.object_type)) - return Dataset.retrieve(self['dataset_id'], client=self._client) + return Dataset.retrieve(self.dataset_id, client=self._client) @property def parent(self): From 1b9ff5e17ba513cdfbd18cf8fd3fb588ed3717e1 Mon Sep 17 00:00:00 2001 From: Jeff Hull Date: Fri, 24 Apr 2020 14:46:31 -0400 Subject: [PATCH 5/7] update examples --- examples/import_data.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/examples/import_data.py b/examples/import_data.py index a1a983d5..0d848895 100644 --- a/examples/import_data.py +++ b/examples/import_data.py @@ -27,7 +27,7 @@ # Prints updates as the data is processed # and indexed into SolveBio -imp.follow() +dataset.activity(follow=True) # # You now have data! @@ -49,5 +49,4 @@ dataset_id=dataset.id, data_records=new_records ) - -imp.follow() +dataset.activity(follow=True) From c15262e94a95a26d958b2ecd9fae3794209bd410 Mon Sep 17 00:00:00 2001 From: Jeff Hull Date: Fri, 24 Apr 2020 14:46:44 -0400 Subject: [PATCH 6/7] add a note --- solvebio/errors.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/solvebio/errors.py b/solvebio/errors.py index 5f9f8fc9..f61493ee 100644 --- a/solvebio/errors.py +++ b/solvebio/errors.py @@ -59,5 +59,23 @@ def __init__(self, message=None, response=None): if self.json_body: self.message += ' %s' % self.json_body + # TODO + # NOTE there are other keys that exist in some Errors that + # are not detail or non_field_errors. For instance 'manifest' + # is a key if uploading using a manifest with invalid file format. + # It includes a very useful error message that gets lost. + # Is there harm in just handling any error key here? + # (handle keys with list values and those without) + # Implementation below + # + # for k, v in self.json_body: + # if isinstance(v, list): + # self.message += ' %s Errors: %s' % \ + # (k, ', '.join(self.json_body[k])) + # else: + # self.message += ' %s Errors: %s' % \ + # (k, self.json_body[k]) + # del self.json_body[k] + def __str__(self): return self.message From fa89f682c69a2c5566029802f3e6b29f29ecf941 Mon Sep 17 00:00:00 2001 From: Jeff Hull Date: Fri, 24 Apr 2020 15:15:02 -0400 Subject: [PATCH 7/7] use Object lookup --- examples/import_data.py | 2 +- solvebio/test/test_beacon.py | 4 ++-- solvebio/test/test_dataset.py | 21 ++++----------------- solvebio/test/test_exports.py | 2 +- solvebio/test/test_lookup.py | 2 +- solvebio/test/test_query.py | 2 +- solvebio/test/test_query_batch.py | 2 +- 7 files changed, 11 insertions(+), 24 deletions(-) diff --git a/examples/import_data.py b/examples/import_data.py index 0d848895..74f0ae88 100644 --- a/examples/import_data.py +++ b/examples/import_data.py @@ -11,7 +11,7 @@ dataset_name = 'SampleDataset' # Create a dataset -dataset = solvebio.Dataset.get_or_create_by_full_path( +dataset = solvebio.Object.get_or_create_by_full_path( '{0}:/{1}/{2}'.format(vault.name, path, dataset_name), ) diff --git a/solvebio/test/test_beacon.py b/solvebio/test/test_beacon.py index 184894a8..fe2e37cd 100644 --- a/solvebio/test/test_beacon.py +++ b/solvebio/test/test_beacon.py @@ -12,7 +12,7 @@ def test_beacon_request(self): Check that current Clinvar/Variants returns correct fields for beacon """ - dataset = self.client.Dataset.get_by_full_path( + dataset = self.client.Object.get_by_full_path( self.TEST_DATASET_FULL_PATH) beacon = dataset.beacon(chromosome='6', coordinate=51612854, # staging @@ -26,7 +26,7 @@ def test_beacon_request(self): # Check that Clinvar/Variants version 3.7.0-2015-12-06 # returns true for specific case - dataset = self.client.Dataset.get_by_full_path( + dataset = self.client.Object.get_by_full_path( self.TEST_DATASET_FULL_PATH) beacontwo = dataset.beacon(chromosome='13', coordinate=113803460, diff --git a/solvebio/test/test_dataset.py b/solvebio/test/test_dataset.py index e4d13542..ab16a288 100644 --- a/solvebio/test/test_dataset.py +++ b/solvebio/test/test_dataset.py @@ -14,7 +14,8 @@ def test_dataset_retrieval(self): self.assertTrue('id' in dataset, 'Should be able to get id in dataset') - check_fields = ['class_name', 'created_at', + check_fields = ['class_name', + 'created_at', 'data_url', 'vault_id', 'vault_object_id', @@ -49,7 +50,7 @@ def test_dataset_tree_traversal_shortcuts(self): ) def test_dataset_fields(self): - dataset = self.client.Dataset.get_by_full_path( + dataset = self.client.Object.get_by_full_path( self.TEST_DATASET_FULL_PATH) fields = dataset.fields() dataset_field = fields.data[0] @@ -67,22 +68,8 @@ def test_dataset_fields(self): self.assertSetEqual(set(dataset_field.keys()), check_fields) def test_dataset_facets(self): - dataset = self.client.Dataset.get_by_full_path( + dataset = self.client.Object.get_by_full_path( self.TEST_DATASET_FULL_PATH) field = dataset.fields('status') facets = field.facets() self.assertTrue(len(facets['facets']) >= 0) - - """ - # TODO support a Genomic test dataset (grab clinvar one from API build) - def test_dataset_beacon(self): - obj = Object.get_by_full_path(self.TEST_DATASET_FULL_PATH) - resp = Dataset.retrieve(obj['dataset_id']).beacon(chromosome="6", - coordinate=123, - allele='G') - self.assertTrue('total' in resp and resp['total'] == 0) - self.assertTrue('exist' in resp and resp['exist'] == False) - self.assertTrue('query' in resp and resp['query']['chromosome'] == '6') - self.assertTrue('query' in resp and resp['query']['allele'] == 'G') - self.assertTrue('query' in resp and resp['query']['coordinate'] == 123) - """ diff --git a/solvebio/test/test_exports.py b/solvebio/test/test_exports.py index a05bdd34..98b862de 100644 --- a/solvebio/test/test_exports.py +++ b/solvebio/test/test_exports.py @@ -14,7 +14,7 @@ class ExportsTests(SolveBioTestCase): def setUp(self): super(ExportsTests, self).setUp() filters = Filter(rgd_id='RGD:2645') - self.dataset = self.client.Dataset.get_by_full_path( + self.dataset = self.client.Object.get_by_full_path( 'solvebio:public:/HGNC/3.0.0-2016-11-10/HGNC') self.query = self.dataset.query(filters=filters, fields=['rgd_id'], genome_build='GRCh37', limit=10) diff --git a/solvebio/test/test_lookup.py b/solvebio/test/test_lookup.py index fa409d40..a7e43941 100644 --- a/solvebio/test/test_lookup.py +++ b/solvebio/test/test_lookup.py @@ -7,7 +7,7 @@ class LookupTests(SolveBioTestCase): def setUp(self): super(LookupTests, self).setUp() - self.dataset = self.client.Dataset.get_by_full_path( + self.dataset = self.client.Object.get_by_full_path( self.TEST_DATASET_FULL_PATH) def test_lookup_error(self): diff --git a/solvebio/test/test_query.py b/solvebio/test/test_query.py index 96d368b6..7c1ca86c 100644 --- a/solvebio/test/test_query.py +++ b/solvebio/test/test_query.py @@ -11,7 +11,7 @@ class BaseQueryTest(SolveBioTestCase): """Test Paging Queries""" def setUp(self): super(BaseQueryTest, self).setUp() - self.dataset = self.client.Dataset.get_by_full_path( + self.dataset = self.client.Object.get_by_full_path( self.TEST_DATASET_FULL_PATH) def test_basic(self): diff --git a/solvebio/test/test_query_batch.py b/solvebio/test/test_query_batch.py index 95ea5e1a..fee86727 100644 --- a/solvebio/test/test_query_batch.py +++ b/solvebio/test/test_query_batch.py @@ -6,7 +6,7 @@ class BatchQueryTest(SolveBioTestCase): def setUp(self): super(BatchQueryTest, self).setUp() - self.dataset = self.client.Dataset.get_by_full_path( + self.dataset = self.client.Object.get_by_full_path( self.TEST_DATASET_FULL_PATH) def test_invalid_batch_query(self):