Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 28 additions & 42 deletions lib/SampleService/core/storage/arango_sample_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,11 +79,11 @@

from SampleService.core.acls import SampleACL
from SampleService.core.core_types import PrimitiveType as _PrimitiveType
from SampleService.core.sample import SavedSample, SampleNodeAddress
from SampleService.core.data_link import DataLink
from SampleService.core.sample import SavedSample
from SampleService.core.sample import SampleNode as _SampleNode, SubSampleType as _SubSampleType
from SampleService.core.arg_checkers import not_falsy as _not_falsy
from SampleService.core.arg_checkers import check_string as _check_string
from SampleService.core.arg_checkers import check_timestamp as _check_timestamp
from SampleService.core.errors import ConcurrencyError as _ConcurrencyError
from SampleService.core.errors import DataLinkExistsError as _DataLinkExistsError
from SampleService.core.errors import NoSuchSampleError as _NoSuchSampleError
Expand Down Expand Up @@ -756,11 +756,7 @@ def replace_sample_acls(self, id_: UUID, acls: SampleACL):

# TODO change acls with more granularity

def link_workspace_data(
self,
duid: DataUnitID,
sample_node_address: SampleNodeAddress,
timestamp: datetime.datetime):
def link_workspace_data(self, link: DataLink):
'''
Link data in the workspace to a sample.
Each data unit can be linked to only one sample at a time. Expired links may exist to
Expand All @@ -769,10 +765,7 @@ def link_workspace_data(
No checking is done on whether the user has permissions to link the data or whether the
data or sample node exists.

:param duid: The workspace data unit to link to the sample.
:param sample_node_address: The sample node to which the data will be linked.
:param timestamp: The current timestamp. This will be used to set the creation time on the
link.
:param link: the link to save.

:raises NoSuchSampleError: if the sample does not exist.
:raises NoSuchSampleVersionError: if the sample version does not exist.
Expand Down Expand Up @@ -802,11 +795,11 @@ def link_workspace_data(
# Since _keys have a maxium length of 254 chars and the dataid of the DUID may be up to
# 256 characters and may contain illegal characters, it is MD5'd. See
# https://www.arangodb.com/docs/stable/data-modeling-naming-conventions-document-keys.html
sna = sample_node_address
del sample_node_address
_not_falsy(duid, 'duid')
_not_falsy(sna, 'sample_node_address')
_check_timestamp(timestamp, 'timestamp')

# should make a link ID? UUID? Maybe not necessary? Effectively DUID since 1:1

_not_falsy(link, 'link')
sna = link.sample_node_address
# need to get the version doc to ensure the documents have been updated appropriately,
# see comments at beginning of file
_, versiondoc, _ = self._get_sample_and_version_doc(sna.sampleid, sna.version)
Expand All @@ -823,20 +816,20 @@ def link_workspace_data(
try:
# makes a collection specifically for this transaction
tdlc = tdb.collection(self._col_data_link.name)
if self._has_doc(tdlc, self._create_link_key(duid)):
raise _DataLinkExistsError(str(duid))
if self._has_doc(tdlc, self._create_link_key(link.duid)):
raise _DataLinkExistsError(str(link.duid))

# TODO DATALINK should only count links coexisting with current link when expiration
# works
if self._count_links_from_ws_object(tdb, duid.upa) >= self._max_links:
if self._count_links_from_ws_object(tdb, link.duid.upa) >= self._max_links:
raise _TooManyDataLinksError(
f'More than {self._max_links} links from workpace object {duid.upa}')
f'More than {self._max_links} links from workpace object {link.duid.upa}')
if self._count_links_from_sample_ver(tdb, sna.sampleid, samplever) >= self._max_links:
raise _TooManyDataLinksError(
f'More than {self._max_links} links from sample {sna.sampleid} ' +
f'version {sna.version}')

ldoc = self._create_link_doc(duid, sna.sampleid, samplever, sna.node, timestamp)
ldoc = self._create_link_doc(link, samplever)
self._insert(tdlc, ldoc)
try:
tdb.commit_transaction()
Expand All @@ -852,9 +845,6 @@ def link_workspace_data(
# connection is hosed
raise _SampleStorageError('Connection to database failed: ' + str(e)) from e

# TODO DATALINK return link object
# TODO DATALINK make a link ID? UUID? Maybe not necessary? Effectively DUID since 1:1

def _has_doc(self, col, id_):
# may want exception thrown at some point?
try:
Expand Down Expand Up @@ -904,29 +894,25 @@ def _create_link_key(self, duid: DataUnitID):
dataid = f'_{self._md5(duid.dataid)}' if duid.dataid else ''
return f'{duid.upa.wsid}_{duid.upa.objid}_{duid.upa.version}{dataid}'

def _create_link_doc(
self,
duid: DataUnitID,
sampleid: UUID,
samplever: UUID,
node: str,
timestamp: datetime.datetime):
nodeid = self._get_node_id(sampleid, samplever, node)
def _create_link_doc(self, link: DataLink, samplever: UUID):
sna = link.sample_node_address
upa = link.duid.upa
nodeid = self._get_node_id(sna.sampleid, samplever, sna.node)
# see https://github.com/kbase/relation_engine_spec/blob/4a9dc6df2088763a9df88f0b018fa5c64f2935aa/schemas/ws/ws_object_version.yaml#L17 # noqa
from_ = f'{self._col_ws.name}/{duid.upa.wsid}:{duid.upa.objid}:{duid.upa.version}'
from_ = f'{self._col_ws.name}/{upa.wsid}:{upa.objid}:{upa.version}'
return {
_FLD_ARANGO_KEY: self._create_link_key(duid),
_FLD_ARANGO_KEY: self._create_link_key(link.duid),
_FLD_ARANGO_FROM: from_,
_FLD_ARANGO_TO: f'{self._col_nodes.name}/{nodeid}',
_FLD_LINK_CREATED: timestamp.timestamp(),
_FLD_LINK_EXPIRES: _MAX_ADB_INTEGER,
_FLD_LINK_WORKSPACE_ID: duid.upa.wsid,
_FLD_LINK_OBJECT_ID: duid.upa.objid,
_FLD_LINK_OBJECT_VERSION: duid.upa.version,
_FLD_LINK_OBJECT_DATA_UNIT: duid.dataid,
_FLD_LINK_SAMPLE_ID: str(sampleid),
_FLD_LINK_CREATED: link.create.timestamp(),
_FLD_LINK_EXPIRES: link.expire.timestamp() if link.expire else _MAX_ADB_INTEGER,
_FLD_LINK_WORKSPACE_ID: upa.wsid,
_FLD_LINK_OBJECT_ID: upa.objid,
_FLD_LINK_OBJECT_VERSION: upa.version,
_FLD_LINK_OBJECT_DATA_UNIT: link.duid.dataid,
_FLD_LINK_SAMPLE_ID: str(sna.sampleid),
_FLD_LINK_SAMPLE_VERSION: str(samplever),
_FLD_LINK_SAMPLE_NODE: node
_FLD_LINK_SAMPLE_NODE: sna.node
}


Expand Down
117 changes: 57 additions & 60 deletions test/core/storage/arango_sample_storage_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from core.test_utils import assert_exception_correct
from arango_controller import ArangoController
from SampleService.core.acls import SampleACL
from SampleService.core.data_link import DataLink
from SampleService.core.sample import SavedSample, SampleNode, SubSampleType, SampleNodeAddress
from SampleService.core.sample import SampleAddress
from SampleService.core.errors import MissingParameterError, NoSuchSampleError, ConcurrencyError
Expand Down Expand Up @@ -1073,31 +1074,32 @@ def test_ws_data_link(samplestorage):
assert samplestorage.save_sample(
SavedSample(id2, 'user', [SampleNode('mynode2')], dt(3), 'foo')) is True

samplestorage.link_workspace_data(
samplestorage.link_workspace_data(DataLink(
DataUnitID(UPA('5/89/32')),
SampleNodeAddress(SampleAddress(id1, 2), 'mynode1'),
dt(500)
dt(500))
)

# test different workspace object and different sample version
samplestorage.link_workspace_data(
samplestorage.link_workspace_data(DataLink(
DataUnitID(UPA('42/42/42'), 'dataunit1'),
SampleNodeAddress(SampleAddress(id1, 1), 'mynode'),
dt(600)
dt(600))
)

# test data unit vs just UPA and different sample
samplestorage.link_workspace_data(
# test data unit vs just UPA, different sample, and expiration date
samplestorage.link_workspace_data(DataLink(
DataUnitID(UPA('5/89/32'), 'dataunit2'),
SampleNodeAddress(SampleAddress(id2, 1), 'mynode2'),
dt(700)
dt(700),
dt(30000))
)

# test data units don't collide if they have different names
samplestorage.link_workspace_data(
samplestorage.link_workspace_data(DataLink(
DataUnitID(UPA('5/89/32'), 'dataunit1'),
SampleNodeAddress(SampleAddress(id1, 1), 'mynode'),
dt(800)
dt(800))
)

# this is naughty
Expand Down Expand Up @@ -1161,7 +1163,7 @@ def test_ws_data_link(samplestorage):
'sver': verdoc3['uuidver'],
'node': 'mynode2',
'create': 700,
'expire': 9007199254740991
'expire': 30000
}

link4 = samplestorage._col_data_link.get('5_89_32_bc7324de86d54718dd0dc29c55c6d53a')
Expand Down Expand Up @@ -1206,10 +1208,10 @@ def test_ws_data_link_correct_missing_versions(samplestorage):
samplestorage._col_version.update_match({}, {'ver': -1})
samplestorage._col_nodes.update_match({'name': 'kid2'}, {'ver': -1})

samplestorage.link_workspace_data(
samplestorage.link_workspace_data(DataLink(
DataUnitID(UPA('5/89/32')),
SampleNodeAddress(SampleAddress(id_, 1), 'kid1'),
dt(500)
dt(500))
)

assert samplestorage._col_version.count() == 1
Expand All @@ -1224,20 +1226,9 @@ def test_ws_data_link_correct_missing_versions(samplestorage):
assert v['ver'] == 1


def test_ws_data_link_bad_args(samplestorage):
ss = samplestorage
id1 = uuid.UUID('1234567890abcdef1234567890abcdef')
d = DataUnitID(UPA('1/1/1'))
s = SampleNodeAddress(SampleAddress(id1, 1), 'foo')
t = dt(1)
_ws_data_link_fail(ss, None, s, t, ValueError(
'duid cannot be a value that evaluates to false'))
_ws_data_link_fail(ss, d, None, t, ValueError(
'sample_node_address cannot be a value that evaluates to false'))
_ws_data_link_fail(ss, d, s, None, ValueError(
'timestamp cannot be a value that evaluates to false'))
_ws_data_link_fail(ss, d, s, datetime.datetime.now(), ValueError(
'timestamp cannot be a naive datetime'))
def test_ws_data_link_no_link(samplestorage):
_ws_data_link_fail(samplestorage, None, ValueError(
'link cannot be a value that evaluates to false'))


def test_ws_data_link_no_sample(samplestorage):
Expand All @@ -1248,9 +1239,10 @@ def test_ws_data_link_no_sample(samplestorage):

_ws_data_link_fail(
samplestorage,
DataUnitID(UPA('1/1/1')),
SampleNodeAddress(SampleAddress(id2, 1), 'mynode'),
dt(1),
DataLink(
DataUnitID(UPA('1/1/1')),
SampleNodeAddress(SampleAddress(id2, 1), 'mynode'),
dt(1)),
NoSuchSampleError(str(id2))
)

Expand All @@ -1264,9 +1256,10 @@ def test_ws_data_link_no_sample_version(samplestorage):

_ws_data_link_fail(
samplestorage,
DataUnitID(UPA('1/1/1')),
SampleNodeAddress(SampleAddress(id1, 3), 'mynode'),
dt(1),
DataLink(
DataUnitID(UPA('1/1/1')),
SampleNodeAddress(SampleAddress(id1, 3), 'mynode'),
dt(1)),
NoSuchSampleVersionError('12345678-90ab-cdef-1234-567890abcdef ver 3')
)

Expand All @@ -1276,31 +1269,33 @@ def test_ws_data_link_link_exists(samplestorage):
assert samplestorage.save_sample(
SavedSample(id1, 'user', [SampleNode('mynode')], dt(1), 'foo')) is True

samplestorage.link_workspace_data(
samplestorage.link_workspace_data(DataLink(
DataUnitID(UPA('1/1/1')),
SampleNodeAddress(SampleAddress(id1, 1), 'mynode'),
dt(500)
dt(500))
)

samplestorage.link_workspace_data(
samplestorage.link_workspace_data(DataLink(
DataUnitID(UPA('1/1/1'), 'du1'),
SampleNodeAddress(SampleAddress(id1, 1), 'mynode'),
dt(500)
dt(500))
)

_ws_data_link_fail(
samplestorage,
DataUnitID(UPA('1/1/1')),
SampleNodeAddress(SampleAddress(id1, 1), 'mynode'),
dt(1),
DataLink(
DataUnitID(UPA('1/1/1')),
SampleNodeAddress(SampleAddress(id1, 1), 'mynode'),
dt(1)),
DataLinkExistsError('1/1/1')
)

_ws_data_link_fail(
samplestorage,
DataUnitID(UPA('1/1/1'), 'du1'),
SampleNodeAddress(SampleAddress(id1, 1), 'mynode'),
dt(1),
DataLink(
DataUnitID(UPA('1/1/1'), 'du1'),
SampleNodeAddress(SampleAddress(id1, 1), 'mynode'),
dt(1)),
DataLinkExistsError('1/1/1:du1')
)

Expand All @@ -1315,29 +1310,30 @@ def test_ws_data_link_too_many_links_from_ws_obj(samplestorage):
assert ss.save_sample(
SavedSample(id2, 'user', [SampleNode('mynode')], dt(1), 'foo')) is True

ss.link_workspace_data(
ss.link_workspace_data(DataLink(
DataUnitID(UPA('1/1/1')),
SampleNodeAddress(SampleAddress(id1, 1), 'mynode'),
dt(500)
dt(500))
)

ss.link_workspace_data(
ss.link_workspace_data(DataLink(
DataUnitID(UPA('1/1/1'), '1'),
SampleNodeAddress(SampleAddress(id2, 1), 'mynode'),
dt(500)
dt(500))
)

ss.link_workspace_data(
ss.link_workspace_data(DataLink(
DataUnitID(UPA('1/1/1'), '2'),
SampleNodeAddress(SampleAddress(id1, 1), 'mynode'),
dt(500)
dt(500))
)

_ws_data_link_fail(
ss,
DataUnitID(UPA('1/1/1'), '3'),
SampleNodeAddress(SampleAddress(id2, 1), 'mynode'),
dt(1),
DataLink(
DataUnitID(UPA('1/1/1'), '3'),
SampleNodeAddress(SampleAddress(id2, 1), 'mynode'),
dt(1)),
TooManyDataLinksError('More than 3 links from workpace object 1/1/1')
)

Expand All @@ -1350,23 +1346,24 @@ def test_ws_data_link_too_many_links_from_sample_ver(samplestorage):
SavedSample(
id1, 'user', [SampleNode('mynode'), SampleNode('mynode2')], dt(1), 'foo')) is True

ss.link_workspace_data(
ss.link_workspace_data(DataLink(
DataUnitID(UPA('1/1/1')),
SampleNodeAddress(SampleAddress(id1, 1), 'mynode'),
dt(500)
dt(500))
)

ss.link_workspace_data(
ss.link_workspace_data(DataLink(
DataUnitID(UPA('1/1/2')),
SampleNodeAddress(SampleAddress(id1, 1), 'mynode2'),
dt(500)
dt(500))
)

_ws_data_link_fail(
ss,
DataUnitID(UPA('1/1/3')),
SampleNodeAddress(SampleAddress(id1, 1), 'mynode'),
dt(1),
DataLink(
DataUnitID(UPA('1/1/3')),
SampleNodeAddress(SampleAddress(id1, 1), 'mynode'),
dt(1)),
TooManyDataLinksError(
'More than 2 links from sample 12345678-90ab-cdef-1234-567890abcdef version 1')
)
Expand All @@ -1387,7 +1384,7 @@ def _samplestorage_with_max_links(samplestorage, max_links):
max_links=max_links)


def _ws_data_link_fail(samplestorage, duid, sna, ts, expected):
def _ws_data_link_fail(samplestorage, link, expected):
with raises(Exception) as got:
samplestorage.link_workspace_data(duid, sna, ts)
samplestorage.link_workspace_data(link)
assert_exception_correct(got.value, expected)