From 2838f27cd9a231fb44bd53a86bed25c8f13e7993 Mon Sep 17 00:00:00 2001 From: Gavin Date: Mon, 6 Apr 2020 14:09:32 -0700 Subject: [PATCH] Use link class in db layer Simplifies interface & code --- .../core/storage/arango_sample_storage.py | 70 +++++------ .../storage/arango_sample_storage_test.py | 117 +++++++++--------- 2 files changed, 85 insertions(+), 102 deletions(-) diff --git a/lib/SampleService/core/storage/arango_sample_storage.py b/lib/SampleService/core/storage/arango_sample_storage.py index 8e74f95f..321e6a9d 100644 --- a/lib/SampleService/core/storage/arango_sample_storage.py +++ b/lib/SampleService/core/storage/arango_sample_storage.py @@ -79,11 +79,11 @@ from SampleService.core.acls import SampleACL from SampleService.core.core_types import PrimitiveType as _PrimitiveType -from SampleService.core.sample import SavedSample, SampleNodeAddress +from SampleService.core.data_link import DataLink +from SampleService.core.sample import SavedSample from SampleService.core.sample import SampleNode as _SampleNode, SubSampleType as _SubSampleType from SampleService.core.arg_checkers import not_falsy as _not_falsy from SampleService.core.arg_checkers import check_string as _check_string -from SampleService.core.arg_checkers import check_timestamp as _check_timestamp from SampleService.core.errors import ConcurrencyError as _ConcurrencyError from SampleService.core.errors import DataLinkExistsError as _DataLinkExistsError from SampleService.core.errors import NoSuchSampleError as _NoSuchSampleError @@ -756,11 +756,7 @@ def replace_sample_acls(self, id_: UUID, acls: SampleACL): # TODO change acls with more granularity - def link_workspace_data( - self, - duid: DataUnitID, - sample_node_address: SampleNodeAddress, - timestamp: datetime.datetime): + def link_workspace_data(self, link: DataLink): ''' Link data in the workspace to a sample. Each data unit can be linked to only one sample at a time. Expired links may exist to @@ -769,10 +765,7 @@ def link_workspace_data( No checking is done on whether the user has permissions to link the data or whether the data or sample node exists. - :param duid: The workspace data unit to link to the sample. - :param sample_node_address: The sample node to which the data will be linked. - :param timestamp: The current timestamp. This will be used to set the creation time on the - link. + :param link: the link to save. :raises NoSuchSampleError: if the sample does not exist. :raises NoSuchSampleVersionError: if the sample version does not exist. @@ -802,11 +795,11 @@ def link_workspace_data( # Since _keys have a maxium length of 254 chars and the dataid of the DUID may be up to # 256 characters and may contain illegal characters, it is MD5'd. See # https://www.arangodb.com/docs/stable/data-modeling-naming-conventions-document-keys.html - sna = sample_node_address - del sample_node_address - _not_falsy(duid, 'duid') - _not_falsy(sna, 'sample_node_address') - _check_timestamp(timestamp, 'timestamp') + + # should make a link ID? UUID? Maybe not necessary? Effectively DUID since 1:1 + + _not_falsy(link, 'link') + sna = link.sample_node_address # need to get the version doc to ensure the documents have been updated appropriately, # see comments at beginning of file _, versiondoc, _ = self._get_sample_and_version_doc(sna.sampleid, sna.version) @@ -823,20 +816,20 @@ def link_workspace_data( try: # makes a collection specifically for this transaction tdlc = tdb.collection(self._col_data_link.name) - if self._has_doc(tdlc, self._create_link_key(duid)): - raise _DataLinkExistsError(str(duid)) + if self._has_doc(tdlc, self._create_link_key(link.duid)): + raise _DataLinkExistsError(str(link.duid)) # TODO DATALINK should only count links coexisting with current link when expiration # works - if self._count_links_from_ws_object(tdb, duid.upa) >= self._max_links: + if self._count_links_from_ws_object(tdb, link.duid.upa) >= self._max_links: raise _TooManyDataLinksError( - f'More than {self._max_links} links from workpace object {duid.upa}') + f'More than {self._max_links} links from workpace object {link.duid.upa}') if self._count_links_from_sample_ver(tdb, sna.sampleid, samplever) >= self._max_links: raise _TooManyDataLinksError( f'More than {self._max_links} links from sample {sna.sampleid} ' + f'version {sna.version}') - ldoc = self._create_link_doc(duid, sna.sampleid, samplever, sna.node, timestamp) + ldoc = self._create_link_doc(link, samplever) self._insert(tdlc, ldoc) try: tdb.commit_transaction() @@ -852,9 +845,6 @@ def link_workspace_data( # connection is hosed raise _SampleStorageError('Connection to database failed: ' + str(e)) from e - # TODO DATALINK return link object - # TODO DATALINK make a link ID? UUID? Maybe not necessary? Effectively DUID since 1:1 - def _has_doc(self, col, id_): # may want exception thrown at some point? try: @@ -904,29 +894,25 @@ def _create_link_key(self, duid: DataUnitID): dataid = f'_{self._md5(duid.dataid)}' if duid.dataid else '' return f'{duid.upa.wsid}_{duid.upa.objid}_{duid.upa.version}{dataid}' - def _create_link_doc( - self, - duid: DataUnitID, - sampleid: UUID, - samplever: UUID, - node: str, - timestamp: datetime.datetime): - nodeid = self._get_node_id(sampleid, samplever, node) + def _create_link_doc(self, link: DataLink, samplever: UUID): + sna = link.sample_node_address + upa = link.duid.upa + nodeid = self._get_node_id(sna.sampleid, samplever, sna.node) # see https://github.com/kbase/relation_engine_spec/blob/4a9dc6df2088763a9df88f0b018fa5c64f2935aa/schemas/ws/ws_object_version.yaml#L17 # noqa - from_ = f'{self._col_ws.name}/{duid.upa.wsid}:{duid.upa.objid}:{duid.upa.version}' + from_ = f'{self._col_ws.name}/{upa.wsid}:{upa.objid}:{upa.version}' return { - _FLD_ARANGO_KEY: self._create_link_key(duid), + _FLD_ARANGO_KEY: self._create_link_key(link.duid), _FLD_ARANGO_FROM: from_, _FLD_ARANGO_TO: f'{self._col_nodes.name}/{nodeid}', - _FLD_LINK_CREATED: timestamp.timestamp(), - _FLD_LINK_EXPIRES: _MAX_ADB_INTEGER, - _FLD_LINK_WORKSPACE_ID: duid.upa.wsid, - _FLD_LINK_OBJECT_ID: duid.upa.objid, - _FLD_LINK_OBJECT_VERSION: duid.upa.version, - _FLD_LINK_OBJECT_DATA_UNIT: duid.dataid, - _FLD_LINK_SAMPLE_ID: str(sampleid), + _FLD_LINK_CREATED: link.create.timestamp(), + _FLD_LINK_EXPIRES: link.expire.timestamp() if link.expire else _MAX_ADB_INTEGER, + _FLD_LINK_WORKSPACE_ID: upa.wsid, + _FLD_LINK_OBJECT_ID: upa.objid, + _FLD_LINK_OBJECT_VERSION: upa.version, + _FLD_LINK_OBJECT_DATA_UNIT: link.duid.dataid, + _FLD_LINK_SAMPLE_ID: str(sna.sampleid), _FLD_LINK_SAMPLE_VERSION: str(samplever), - _FLD_LINK_SAMPLE_NODE: node + _FLD_LINK_SAMPLE_NODE: sna.node } diff --git a/test/core/storage/arango_sample_storage_test.py b/test/core/storage/arango_sample_storage_test.py index 12171f50..514be757 100644 --- a/test/core/storage/arango_sample_storage_test.py +++ b/test/core/storage/arango_sample_storage_test.py @@ -7,6 +7,7 @@ from core.test_utils import assert_exception_correct from arango_controller import ArangoController from SampleService.core.acls import SampleACL +from SampleService.core.data_link import DataLink from SampleService.core.sample import SavedSample, SampleNode, SubSampleType, SampleNodeAddress from SampleService.core.sample import SampleAddress from SampleService.core.errors import MissingParameterError, NoSuchSampleError, ConcurrencyError @@ -1073,31 +1074,32 @@ def test_ws_data_link(samplestorage): assert samplestorage.save_sample( SavedSample(id2, 'user', [SampleNode('mynode2')], dt(3), 'foo')) is True - samplestorage.link_workspace_data( + samplestorage.link_workspace_data(DataLink( DataUnitID(UPA('5/89/32')), SampleNodeAddress(SampleAddress(id1, 2), 'mynode1'), - dt(500) + dt(500)) ) # test different workspace object and different sample version - samplestorage.link_workspace_data( + samplestorage.link_workspace_data(DataLink( DataUnitID(UPA('42/42/42'), 'dataunit1'), SampleNodeAddress(SampleAddress(id1, 1), 'mynode'), - dt(600) + dt(600)) ) - # test data unit vs just UPA and different sample - samplestorage.link_workspace_data( + # test data unit vs just UPA, different sample, and expiration date + samplestorage.link_workspace_data(DataLink( DataUnitID(UPA('5/89/32'), 'dataunit2'), SampleNodeAddress(SampleAddress(id2, 1), 'mynode2'), - dt(700) + dt(700), + dt(30000)) ) # test data units don't collide if they have different names - samplestorage.link_workspace_data( + samplestorage.link_workspace_data(DataLink( DataUnitID(UPA('5/89/32'), 'dataunit1'), SampleNodeAddress(SampleAddress(id1, 1), 'mynode'), - dt(800) + dt(800)) ) # this is naughty @@ -1161,7 +1163,7 @@ def test_ws_data_link(samplestorage): 'sver': verdoc3['uuidver'], 'node': 'mynode2', 'create': 700, - 'expire': 9007199254740991 + 'expire': 30000 } link4 = samplestorage._col_data_link.get('5_89_32_bc7324de86d54718dd0dc29c55c6d53a') @@ -1206,10 +1208,10 @@ def test_ws_data_link_correct_missing_versions(samplestorage): samplestorage._col_version.update_match({}, {'ver': -1}) samplestorage._col_nodes.update_match({'name': 'kid2'}, {'ver': -1}) - samplestorage.link_workspace_data( + samplestorage.link_workspace_data(DataLink( DataUnitID(UPA('5/89/32')), SampleNodeAddress(SampleAddress(id_, 1), 'kid1'), - dt(500) + dt(500)) ) assert samplestorage._col_version.count() == 1 @@ -1224,20 +1226,9 @@ def test_ws_data_link_correct_missing_versions(samplestorage): assert v['ver'] == 1 -def test_ws_data_link_bad_args(samplestorage): - ss = samplestorage - id1 = uuid.UUID('1234567890abcdef1234567890abcdef') - d = DataUnitID(UPA('1/1/1')) - s = SampleNodeAddress(SampleAddress(id1, 1), 'foo') - t = dt(1) - _ws_data_link_fail(ss, None, s, t, ValueError( - 'duid cannot be a value that evaluates to false')) - _ws_data_link_fail(ss, d, None, t, ValueError( - 'sample_node_address cannot be a value that evaluates to false')) - _ws_data_link_fail(ss, d, s, None, ValueError( - 'timestamp cannot be a value that evaluates to false')) - _ws_data_link_fail(ss, d, s, datetime.datetime.now(), ValueError( - 'timestamp cannot be a naive datetime')) +def test_ws_data_link_no_link(samplestorage): + _ws_data_link_fail(samplestorage, None, ValueError( + 'link cannot be a value that evaluates to false')) def test_ws_data_link_no_sample(samplestorage): @@ -1248,9 +1239,10 @@ def test_ws_data_link_no_sample(samplestorage): _ws_data_link_fail( samplestorage, - DataUnitID(UPA('1/1/1')), - SampleNodeAddress(SampleAddress(id2, 1), 'mynode'), - dt(1), + DataLink( + DataUnitID(UPA('1/1/1')), + SampleNodeAddress(SampleAddress(id2, 1), 'mynode'), + dt(1)), NoSuchSampleError(str(id2)) ) @@ -1264,9 +1256,10 @@ def test_ws_data_link_no_sample_version(samplestorage): _ws_data_link_fail( samplestorage, - DataUnitID(UPA('1/1/1')), - SampleNodeAddress(SampleAddress(id1, 3), 'mynode'), - dt(1), + DataLink( + DataUnitID(UPA('1/1/1')), + SampleNodeAddress(SampleAddress(id1, 3), 'mynode'), + dt(1)), NoSuchSampleVersionError('12345678-90ab-cdef-1234-567890abcdef ver 3') ) @@ -1276,31 +1269,33 @@ def test_ws_data_link_link_exists(samplestorage): assert samplestorage.save_sample( SavedSample(id1, 'user', [SampleNode('mynode')], dt(1), 'foo')) is True - samplestorage.link_workspace_data( + samplestorage.link_workspace_data(DataLink( DataUnitID(UPA('1/1/1')), SampleNodeAddress(SampleAddress(id1, 1), 'mynode'), - dt(500) + dt(500)) ) - samplestorage.link_workspace_data( + samplestorage.link_workspace_data(DataLink( DataUnitID(UPA('1/1/1'), 'du1'), SampleNodeAddress(SampleAddress(id1, 1), 'mynode'), - dt(500) + dt(500)) ) _ws_data_link_fail( samplestorage, - DataUnitID(UPA('1/1/1')), - SampleNodeAddress(SampleAddress(id1, 1), 'mynode'), - dt(1), + DataLink( + DataUnitID(UPA('1/1/1')), + SampleNodeAddress(SampleAddress(id1, 1), 'mynode'), + dt(1)), DataLinkExistsError('1/1/1') ) _ws_data_link_fail( samplestorage, - DataUnitID(UPA('1/1/1'), 'du1'), - SampleNodeAddress(SampleAddress(id1, 1), 'mynode'), - dt(1), + DataLink( + DataUnitID(UPA('1/1/1'), 'du1'), + SampleNodeAddress(SampleAddress(id1, 1), 'mynode'), + dt(1)), DataLinkExistsError('1/1/1:du1') ) @@ -1315,29 +1310,30 @@ def test_ws_data_link_too_many_links_from_ws_obj(samplestorage): assert ss.save_sample( SavedSample(id2, 'user', [SampleNode('mynode')], dt(1), 'foo')) is True - ss.link_workspace_data( + ss.link_workspace_data(DataLink( DataUnitID(UPA('1/1/1')), SampleNodeAddress(SampleAddress(id1, 1), 'mynode'), - dt(500) + dt(500)) ) - ss.link_workspace_data( + ss.link_workspace_data(DataLink( DataUnitID(UPA('1/1/1'), '1'), SampleNodeAddress(SampleAddress(id2, 1), 'mynode'), - dt(500) + dt(500)) ) - ss.link_workspace_data( + ss.link_workspace_data(DataLink( DataUnitID(UPA('1/1/1'), '2'), SampleNodeAddress(SampleAddress(id1, 1), 'mynode'), - dt(500) + dt(500)) ) _ws_data_link_fail( ss, - DataUnitID(UPA('1/1/1'), '3'), - SampleNodeAddress(SampleAddress(id2, 1), 'mynode'), - dt(1), + DataLink( + DataUnitID(UPA('1/1/1'), '3'), + SampleNodeAddress(SampleAddress(id2, 1), 'mynode'), + dt(1)), TooManyDataLinksError('More than 3 links from workpace object 1/1/1') ) @@ -1350,23 +1346,24 @@ def test_ws_data_link_too_many_links_from_sample_ver(samplestorage): SavedSample( id1, 'user', [SampleNode('mynode'), SampleNode('mynode2')], dt(1), 'foo')) is True - ss.link_workspace_data( + ss.link_workspace_data(DataLink( DataUnitID(UPA('1/1/1')), SampleNodeAddress(SampleAddress(id1, 1), 'mynode'), - dt(500) + dt(500)) ) - ss.link_workspace_data( + ss.link_workspace_data(DataLink( DataUnitID(UPA('1/1/2')), SampleNodeAddress(SampleAddress(id1, 1), 'mynode2'), - dt(500) + dt(500)) ) _ws_data_link_fail( ss, - DataUnitID(UPA('1/1/3')), - SampleNodeAddress(SampleAddress(id1, 1), 'mynode'), - dt(1), + DataLink( + DataUnitID(UPA('1/1/3')), + SampleNodeAddress(SampleAddress(id1, 1), 'mynode'), + dt(1)), TooManyDataLinksError( 'More than 2 links from sample 12345678-90ab-cdef-1234-567890abcdef version 1') ) @@ -1387,7 +1384,7 @@ def _samplestorage_with_max_links(samplestorage, max_links): max_links=max_links) -def _ws_data_link_fail(samplestorage, duid, sna, ts, expected): +def _ws_data_link_fail(samplestorage, link, expected): with raises(Exception) as got: - samplestorage.link_workspace_data(duid, sna, ts) + samplestorage.link_workspace_data(link) assert_exception_correct(got.value, expected)