Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 45 additions & 26 deletions lib/SampleService/core/storage/arango_sample_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -819,12 +819,12 @@ def link_workspace_data(self, link: DataLink):
if self._has_doc(tdlc, self._create_link_key(link.duid)):
raise _DataLinkExistsError(str(link.duid))

# TODO DATALINK should only count links coexisting with current link when expiration
# works
if self._count_links_from_ws_object(tdb, link.duid.upa) >= self._max_links:
if self._count_links_from_ws_object(
tdb, link.duid.upa, link.create, link.expire) >= self._max_links:
raise _TooManyDataLinksError(
f'More than {self._max_links} links from workpace object {link.duid.upa}')
if self._count_links_from_sample_ver(tdb, sna.sampleid, samplever) >= self._max_links:
if self._count_links_from_sample_ver(
tdb, sna.sampleid, samplever, link.create, link.expire) >= self._max_links:
raise _TooManyDataLinksError(
f'More than {self._max_links} links from sample {sna.sampleid} ' +
f'version {sna.version}')
Expand Down Expand Up @@ -852,40 +852,59 @@ def _has_doc(self, col, id_):
except _arango.exceptions.DocumentInError as e: # this is a pain to test
raise _SampleStorageError('Connection to database failed: ' + str(e)) from e

def _count_links_from_ws_object(self, db, upa: _UPA):
def _count_links_from_ws_object(
self,
db,
upa: _UPA,
create: datetime.datetime,
expire: _Optional[datetime.datetime]):
wsc = self._count_links(
db,
f'''
FILTER d.{_FLD_LINK_WORKSPACE_ID} == @wsid
FILTER d.{_FLD_LINK_OBJECT_ID} == @objid
FILTER d.{_FLD_LINK_OBJECT_VERSION} == @ver
''',
{'wsid': upa.wsid, 'objid': upa.objid, 'ver': upa.version})
FILTER d.{_FLD_LINK_WORKSPACE_ID} == @wsid
FILTER d.{_FLD_LINK_OBJECT_ID} == @objid
FILTER d.{_FLD_LINK_OBJECT_VERSION} == @ver''',
{'wsid': upa.wsid, 'objid': upa.objid, 'ver': upa.version},
create,
expire)
return wsc

def _count_links_from_sample_ver(self, db, sample: UUID, version: UUID):
def _count_links_from_sample_ver(
self,
db,
sample: UUID,
version: UUID,
create: datetime.datetime,
expire: _Optional[datetime.datetime]):
sv = self._count_links(
db,
f'''
FILTER d.{_FLD_LINK_SAMPLE_ID} == @sid
FILTER d.{_FLD_LINK_SAMPLE_VERSION} == @sver
''',
{'sid': str(sample), 'sver': str(version)})
FILTER d.{_FLD_LINK_SAMPLE_ID} == @sid
FILTER d.{_FLD_LINK_SAMPLE_VERSION} == @sver''',
{'sid': str(sample), 'sver': str(version)},
create,
expire)
return sv

def _count_links(self, db, filters: str, bind_vars):
def _count_links(self, db, filters: str, bind_vars, create, expire):
bind_vars['@col'] = self._col_data_link.name
bind_vars['create'] = create.timestamp()
bind_vars['expire'] = expire = expire.timestamp() if expire else _MAX_ADB_INTEGER
# might need to include create / expire in compound indexes if we get a ton of expired
# links. Might not work in a NOT though. Alternate formulation is
# (d.create >= @create AND d.create <= @expire) OR
# (d.expire >= @create AND d.expire <= @expire)
q = (f'''
FOR d in @@col
''' +
filters +
'''
FILTER NOT (d.expire < @create OR d.create > @expire)
COLLECT WITH COUNT INTO linkcount
RETURN linkcount
''')
try:
cur = db.aql.execute(
f'''
FOR d in @@col
''' +
filters +
'''
COLLECT WITH COUNT INTO linkcount
RETURN linkcount
''',
bind_vars=bind_vars)
cur = db.aql.execute(q, bind_vars=bind_vars)
return cur.next()
except _arango.exceptions.AQLQueryExecuteError as e: # this is a pain to test
raise _SampleStorageError('Connection to database failed: ' + str(e)) from e
Expand Down
181 changes: 175 additions & 6 deletions test/core/storage/arango_sample_storage_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -1226,12 +1226,12 @@ def test_ws_data_link_correct_missing_versions(samplestorage):
assert v['ver'] == 1


def test_ws_data_link_no_link(samplestorage):
def test_ws_data_link_fail_no_link(samplestorage):
_ws_data_link_fail(samplestorage, None, ValueError(
'link cannot be a value that evaluates to false'))


def test_ws_data_link_no_sample(samplestorage):
def test_ws_data_link_fail_no_sample(samplestorage):
id1 = uuid.UUID('1234567890abcdef1234567890abcdef')
id2 = uuid.UUID('1234567890abcdef1234567890abcdee')
assert samplestorage.save_sample(
Expand All @@ -1247,7 +1247,7 @@ def test_ws_data_link_no_sample(samplestorage):
)


def test_ws_data_link_no_sample_version(samplestorage):
def test_ws_data_link_fail_no_sample_version(samplestorage):
id1 = uuid.UUID('1234567890abcdef1234567890abcdef')
assert samplestorage.save_sample(
SavedSample(id1, 'user', [SampleNode('mynode')], dt(1), 'foo')) is True
Expand All @@ -1264,7 +1264,7 @@ def test_ws_data_link_no_sample_version(samplestorage):
)


def test_ws_data_link_link_exists(samplestorage):
def test_ws_data_link_fail_link_exists(samplestorage):
id1 = uuid.UUID('1234567890abcdef1234567890abcdef')
assert samplestorage.save_sample(
SavedSample(id1, 'user', [SampleNode('mynode')], dt(1), 'foo')) is True
Expand Down Expand Up @@ -1300,7 +1300,7 @@ def test_ws_data_link_link_exists(samplestorage):
)


def test_ws_data_link_too_many_links_from_ws_obj(samplestorage):
def test_ws_data_link_fail_too_many_links_from_ws_obj_basic(samplestorage):
ss = _samplestorage_with_max_links(samplestorage, 3)

id1 = uuid.UUID('1234567890abcdef1234567890abcdef')
Expand Down Expand Up @@ -1338,7 +1338,7 @@ def test_ws_data_link_too_many_links_from_ws_obj(samplestorage):
)


def test_ws_data_link_too_many_links_from_sample_ver(samplestorage):
def test_ws_data_link_fail_too_many_links_from_sample_ver_basic(samplestorage):
ss = _samplestorage_with_max_links(samplestorage, 2)

id1 = uuid.UUID('1234567890abcdef1234567890abcdef')
Expand Down Expand Up @@ -1369,6 +1369,175 @@ def test_ws_data_link_too_many_links_from_sample_ver(samplestorage):
)


def test_ws_data_link_fail_too_many_links_from_ws_obj_time_travel(samplestorage):
# tests that links that do not co-exist with the new link are not counted against the total.
ss = _samplestorage_with_max_links(samplestorage, 6)

id1 = uuid.UUID('1234567890abcdef1234567890abcdef')
assert ss.save_sample(
SavedSample(id1, 'user', [SampleNode('mynode')], dt(1), 'foo')) is True
assert ss.save_sample_version(
SavedSample(id1, 'user', [SampleNode('mynode')], dt(1), 'foo')) == 2

# completely outside the new sample time range.
ss.link_workspace_data(DataLink(
DataUnitID(UPA('1/1/1')),
SampleNodeAddress(SampleAddress(id1, 1), 'mynode'),
dt(100),
dt(299))
)

# expire matches create
ss.link_workspace_data(DataLink(
DataUnitID(UPA('1/1/1'), '1'),
SampleNodeAddress(SampleAddress(id1, 2), 'mynode'),
dt(100),
dt(300))
)

# overlaps create
ss.link_workspace_data(DataLink(
DataUnitID(UPA('1/1/1'), '2'),
SampleNodeAddress(SampleAddress(id1, 1), 'mynode'),
dt(250),
dt(350))
)

# contained inside
ss.link_workspace_data(DataLink(
DataUnitID(UPA('1/1/1'), '3'),
SampleNodeAddress(SampleAddress(id1, 2), 'mynode'),
dt(325),
dt(375))
)

# encloses
ss.link_workspace_data(DataLink(
DataUnitID(UPA('1/1/1'), '4'),
SampleNodeAddress(SampleAddress(id1, 1), 'mynode'),
dt(250),
dt(450))
)

# overlaps expire
ss.link_workspace_data(DataLink(
DataUnitID(UPA('1/1/1'), '5'),
SampleNodeAddress(SampleAddress(id1, 2), 'mynode'),
dt(350),
dt(450))
)

# create matches expire
ss.link_workspace_data(DataLink(
DataUnitID(UPA('1/1/1'), '6'),
SampleNodeAddress(SampleAddress(id1, 1), 'mynode'),
dt(400),
dt(500))
)

# completely outside the new sample time range.
ss.link_workspace_data(DataLink(
DataUnitID(UPA('1/1/1'), '7'),
SampleNodeAddress(SampleAddress(id1, 2), 'mynode'),
dt(401),
dt(550))
)

_ws_data_link_fail(
ss,
DataLink(
DataUnitID(UPA('1/1/1'), '8'),
SampleNodeAddress(SampleAddress(id1, 1), 'mynode'),
dt(300),
dt(400)),
TooManyDataLinksError('More than 6 links from workpace object 1/1/1')
)


def test_ws_data_link_fail_too_many_links_from_sample_ver_time_travel(samplestorage):
# tests that links that do not co-exist with the new link are not counted against the total.
ss = _samplestorage_with_max_links(samplestorage, 6)

id1 = uuid.UUID('1234567890abcdef1234567890abcdef')
assert ss.save_sample(
SavedSample(id1, 'user', [SampleNode('mynode')], dt(1), 'foo')) is True

# completely outside the new sample time range.
ss.link_workspace_data(DataLink(
DataUnitID(UPA('1/1/1')),
SampleNodeAddress(SampleAddress(id1, 1), 'mynode'),
dt(100),
dt(299))
)

# expire matches create
ss.link_workspace_data(DataLink(
DataUnitID(UPA('1/1/2')),
SampleNodeAddress(SampleAddress(id1, 1), 'mynode'),
dt(100),
dt(300))
)

# overlaps create
ss.link_workspace_data(DataLink(
DataUnitID(UPA('1/1/3')),
SampleNodeAddress(SampleAddress(id1, 1), 'mynode'),
dt(250),
dt(350))
)

# contained inside
ss.link_workspace_data(DataLink(
DataUnitID(UPA('1/1/4')),
SampleNodeAddress(SampleAddress(id1, 1), 'mynode'),
dt(325),
dt(375))
)

# encloses
ss.link_workspace_data(DataLink(
DataUnitID(UPA('1/1/5')),
SampleNodeAddress(SampleAddress(id1, 1), 'mynode'),
dt(250),
dt(450))
)

# overlaps expire
ss.link_workspace_data(DataLink(
DataUnitID(UPA('1/1/6')),
SampleNodeAddress(SampleAddress(id1, 1), 'mynode'),
dt(350),
dt(450))
)

# create matches expire
ss.link_workspace_data(DataLink(
DataUnitID(UPA('1/1/7')),
SampleNodeAddress(SampleAddress(id1, 1), 'mynode'),
dt(400),
dt(500))
)

# completely outside the new sample time range.
ss.link_workspace_data(DataLink(
DataUnitID(UPA('1/1/8')),
SampleNodeAddress(SampleAddress(id1, 1), 'mynode'),
dt(401),
dt(550))
)

_ws_data_link_fail(
ss,
DataLink(
DataUnitID(UPA('1/1/9')),
SampleNodeAddress(SampleAddress(id1, 1), 'mynode'),
dt(300),
dt(400)),
TooManyDataLinksError('More than 6 links from sample ' +
'12345678-90ab-cdef-1234-567890abcdef version 1')
)


def _samplestorage_with_max_links(samplestorage, max_links):
# this is very naughty
return ArangoSampleStorage(
Expand Down