diff --git a/lib/SampleService/core/storage/arango_sample_storage.py b/lib/SampleService/core/storage/arango_sample_storage.py index 321e6a9d..b9c6eb23 100644 --- a/lib/SampleService/core/storage/arango_sample_storage.py +++ b/lib/SampleService/core/storage/arango_sample_storage.py @@ -819,12 +819,12 @@ def link_workspace_data(self, link: DataLink): if self._has_doc(tdlc, self._create_link_key(link.duid)): raise _DataLinkExistsError(str(link.duid)) - # TODO DATALINK should only count links coexisting with current link when expiration - # works - if self._count_links_from_ws_object(tdb, link.duid.upa) >= self._max_links: + if self._count_links_from_ws_object( + tdb, link.duid.upa, link.create, link.expire) >= self._max_links: raise _TooManyDataLinksError( f'More than {self._max_links} links from workpace object {link.duid.upa}') - if self._count_links_from_sample_ver(tdb, sna.sampleid, samplever) >= self._max_links: + if self._count_links_from_sample_ver( + tdb, sna.sampleid, samplever, link.create, link.expire) >= self._max_links: raise _TooManyDataLinksError( f'More than {self._max_links} links from sample {sna.sampleid} ' + f'version {sna.version}') @@ -852,40 +852,59 @@ def _has_doc(self, col, id_): except _arango.exceptions.DocumentInError as e: # this is a pain to test raise _SampleStorageError('Connection to database failed: ' + str(e)) from e - def _count_links_from_ws_object(self, db, upa: _UPA): + def _count_links_from_ws_object( + self, + db, + upa: _UPA, + create: datetime.datetime, + expire: _Optional[datetime.datetime]): wsc = self._count_links( db, f''' - FILTER d.{_FLD_LINK_WORKSPACE_ID} == @wsid - FILTER d.{_FLD_LINK_OBJECT_ID} == @objid - FILTER d.{_FLD_LINK_OBJECT_VERSION} == @ver - ''', - {'wsid': upa.wsid, 'objid': upa.objid, 'ver': upa.version}) + FILTER d.{_FLD_LINK_WORKSPACE_ID} == @wsid + FILTER d.{_FLD_LINK_OBJECT_ID} == @objid + FILTER d.{_FLD_LINK_OBJECT_VERSION} == @ver''', + {'wsid': upa.wsid, 'objid': upa.objid, 'ver': upa.version}, + create, + expire) return wsc - def _count_links_from_sample_ver(self, db, sample: UUID, version: UUID): + def _count_links_from_sample_ver( + self, + db, + sample: UUID, + version: UUID, + create: datetime.datetime, + expire: _Optional[datetime.datetime]): sv = self._count_links( db, f''' - FILTER d.{_FLD_LINK_SAMPLE_ID} == @sid - FILTER d.{_FLD_LINK_SAMPLE_VERSION} == @sver - ''', - {'sid': str(sample), 'sver': str(version)}) + FILTER d.{_FLD_LINK_SAMPLE_ID} == @sid + FILTER d.{_FLD_LINK_SAMPLE_VERSION} == @sver''', + {'sid': str(sample), 'sver': str(version)}, + create, + expire) return sv - def _count_links(self, db, filters: str, bind_vars): + def _count_links(self, db, filters: str, bind_vars, create, expire): bind_vars['@col'] = self._col_data_link.name + bind_vars['create'] = create.timestamp() + bind_vars['expire'] = expire = expire.timestamp() if expire else _MAX_ADB_INTEGER + # might need to include create / expire in compound indexes if we get a ton of expired + # links. Might not work in a NOT though. Alternate formulation is + # (d.create >= @create AND d.create <= @expire) OR + # (d.expire >= @create AND d.expire <= @expire) + q = (f''' + FOR d in @@col + ''' + + filters + + ''' + FILTER NOT (d.expire < @create OR d.create > @expire) + COLLECT WITH COUNT INTO linkcount + RETURN linkcount + ''') try: - cur = db.aql.execute( - f''' - FOR d in @@col - ''' + - filters + - ''' - COLLECT WITH COUNT INTO linkcount - RETURN linkcount - ''', - bind_vars=bind_vars) + cur = db.aql.execute(q, bind_vars=bind_vars) return cur.next() except _arango.exceptions.AQLQueryExecuteError as e: # this is a pain to test raise _SampleStorageError('Connection to database failed: ' + str(e)) from e diff --git a/test/core/storage/arango_sample_storage_test.py b/test/core/storage/arango_sample_storage_test.py index 514be757..1351349e 100644 --- a/test/core/storage/arango_sample_storage_test.py +++ b/test/core/storage/arango_sample_storage_test.py @@ -1226,12 +1226,12 @@ def test_ws_data_link_correct_missing_versions(samplestorage): assert v['ver'] == 1 -def test_ws_data_link_no_link(samplestorage): +def test_ws_data_link_fail_no_link(samplestorage): _ws_data_link_fail(samplestorage, None, ValueError( 'link cannot be a value that evaluates to false')) -def test_ws_data_link_no_sample(samplestorage): +def test_ws_data_link_fail_no_sample(samplestorage): id1 = uuid.UUID('1234567890abcdef1234567890abcdef') id2 = uuid.UUID('1234567890abcdef1234567890abcdee') assert samplestorage.save_sample( @@ -1247,7 +1247,7 @@ def test_ws_data_link_no_sample(samplestorage): ) -def test_ws_data_link_no_sample_version(samplestorage): +def test_ws_data_link_fail_no_sample_version(samplestorage): id1 = uuid.UUID('1234567890abcdef1234567890abcdef') assert samplestorage.save_sample( SavedSample(id1, 'user', [SampleNode('mynode')], dt(1), 'foo')) is True @@ -1264,7 +1264,7 @@ def test_ws_data_link_no_sample_version(samplestorage): ) -def test_ws_data_link_link_exists(samplestorage): +def test_ws_data_link_fail_link_exists(samplestorage): id1 = uuid.UUID('1234567890abcdef1234567890abcdef') assert samplestorage.save_sample( SavedSample(id1, 'user', [SampleNode('mynode')], dt(1), 'foo')) is True @@ -1300,7 +1300,7 @@ def test_ws_data_link_link_exists(samplestorage): ) -def test_ws_data_link_too_many_links_from_ws_obj(samplestorage): +def test_ws_data_link_fail_too_many_links_from_ws_obj_basic(samplestorage): ss = _samplestorage_with_max_links(samplestorage, 3) id1 = uuid.UUID('1234567890abcdef1234567890abcdef') @@ -1338,7 +1338,7 @@ def test_ws_data_link_too_many_links_from_ws_obj(samplestorage): ) -def test_ws_data_link_too_many_links_from_sample_ver(samplestorage): +def test_ws_data_link_fail_too_many_links_from_sample_ver_basic(samplestorage): ss = _samplestorage_with_max_links(samplestorage, 2) id1 = uuid.UUID('1234567890abcdef1234567890abcdef') @@ -1369,6 +1369,175 @@ def test_ws_data_link_too_many_links_from_sample_ver(samplestorage): ) +def test_ws_data_link_fail_too_many_links_from_ws_obj_time_travel(samplestorage): + # tests that links that do not co-exist with the new link are not counted against the total. + ss = _samplestorage_with_max_links(samplestorage, 6) + + id1 = uuid.UUID('1234567890abcdef1234567890abcdef') + assert ss.save_sample( + SavedSample(id1, 'user', [SampleNode('mynode')], dt(1), 'foo')) is True + assert ss.save_sample_version( + SavedSample(id1, 'user', [SampleNode('mynode')], dt(1), 'foo')) == 2 + + # completely outside the new sample time range. + ss.link_workspace_data(DataLink( + DataUnitID(UPA('1/1/1')), + SampleNodeAddress(SampleAddress(id1, 1), 'mynode'), + dt(100), + dt(299)) + ) + + # expire matches create + ss.link_workspace_data(DataLink( + DataUnitID(UPA('1/1/1'), '1'), + SampleNodeAddress(SampleAddress(id1, 2), 'mynode'), + dt(100), + dt(300)) + ) + + # overlaps create + ss.link_workspace_data(DataLink( + DataUnitID(UPA('1/1/1'), '2'), + SampleNodeAddress(SampleAddress(id1, 1), 'mynode'), + dt(250), + dt(350)) + ) + + # contained inside + ss.link_workspace_data(DataLink( + DataUnitID(UPA('1/1/1'), '3'), + SampleNodeAddress(SampleAddress(id1, 2), 'mynode'), + dt(325), + dt(375)) + ) + + # encloses + ss.link_workspace_data(DataLink( + DataUnitID(UPA('1/1/1'), '4'), + SampleNodeAddress(SampleAddress(id1, 1), 'mynode'), + dt(250), + dt(450)) + ) + + # overlaps expire + ss.link_workspace_data(DataLink( + DataUnitID(UPA('1/1/1'), '5'), + SampleNodeAddress(SampleAddress(id1, 2), 'mynode'), + dt(350), + dt(450)) + ) + + # create matches expire + ss.link_workspace_data(DataLink( + DataUnitID(UPA('1/1/1'), '6'), + SampleNodeAddress(SampleAddress(id1, 1), 'mynode'), + dt(400), + dt(500)) + ) + + # completely outside the new sample time range. + ss.link_workspace_data(DataLink( + DataUnitID(UPA('1/1/1'), '7'), + SampleNodeAddress(SampleAddress(id1, 2), 'mynode'), + dt(401), + dt(550)) + ) + + _ws_data_link_fail( + ss, + DataLink( + DataUnitID(UPA('1/1/1'), '8'), + SampleNodeAddress(SampleAddress(id1, 1), 'mynode'), + dt(300), + dt(400)), + TooManyDataLinksError('More than 6 links from workpace object 1/1/1') + ) + + +def test_ws_data_link_fail_too_many_links_from_sample_ver_time_travel(samplestorage): + # tests that links that do not co-exist with the new link are not counted against the total. + ss = _samplestorage_with_max_links(samplestorage, 6) + + id1 = uuid.UUID('1234567890abcdef1234567890abcdef') + assert ss.save_sample( + SavedSample(id1, 'user', [SampleNode('mynode')], dt(1), 'foo')) is True + + # completely outside the new sample time range. + ss.link_workspace_data(DataLink( + DataUnitID(UPA('1/1/1')), + SampleNodeAddress(SampleAddress(id1, 1), 'mynode'), + dt(100), + dt(299)) + ) + + # expire matches create + ss.link_workspace_data(DataLink( + DataUnitID(UPA('1/1/2')), + SampleNodeAddress(SampleAddress(id1, 1), 'mynode'), + dt(100), + dt(300)) + ) + + # overlaps create + ss.link_workspace_data(DataLink( + DataUnitID(UPA('1/1/3')), + SampleNodeAddress(SampleAddress(id1, 1), 'mynode'), + dt(250), + dt(350)) + ) + + # contained inside + ss.link_workspace_data(DataLink( + DataUnitID(UPA('1/1/4')), + SampleNodeAddress(SampleAddress(id1, 1), 'mynode'), + dt(325), + dt(375)) + ) + + # encloses + ss.link_workspace_data(DataLink( + DataUnitID(UPA('1/1/5')), + SampleNodeAddress(SampleAddress(id1, 1), 'mynode'), + dt(250), + dt(450)) + ) + + # overlaps expire + ss.link_workspace_data(DataLink( + DataUnitID(UPA('1/1/6')), + SampleNodeAddress(SampleAddress(id1, 1), 'mynode'), + dt(350), + dt(450)) + ) + + # create matches expire + ss.link_workspace_data(DataLink( + DataUnitID(UPA('1/1/7')), + SampleNodeAddress(SampleAddress(id1, 1), 'mynode'), + dt(400), + dt(500)) + ) + + # completely outside the new sample time range. + ss.link_workspace_data(DataLink( + DataUnitID(UPA('1/1/8')), + SampleNodeAddress(SampleAddress(id1, 1), 'mynode'), + dt(401), + dt(550)) + ) + + _ws_data_link_fail( + ss, + DataLink( + DataUnitID(UPA('1/1/9')), + SampleNodeAddress(SampleAddress(id1, 1), 'mynode'), + dt(300), + dt(400)), + TooManyDataLinksError('More than 6 links from sample ' + + '12345678-90ab-cdef-1234-567890abcdef version 1') + ) + + def _samplestorage_with_max_links(samplestorage, max_links): # this is very naughty return ArangoSampleStorage(