Skip to content

Commit

Permalink
Scottx611x/update node index preparation (#2524)
Browse files Browse the repository at this point in the history
* Skip indexing of Nodes if they correspond to a non-exposed output AnalysisNodeConnection

* Add test coverage

* Rename `object` masking builtin to `node` since that's what we're preparing for indexing

* We can impose these checks in the get()

* Tidy styling

* Rework tests to be more easily readable, and have a single assertion each
  • Loading branch information
scottx611x committed Jan 22, 2018
1 parent 3cb1814 commit c8a9cb3
Show file tree
Hide file tree
Showing 2 changed files with 117 additions and 44 deletions.
57 changes: 38 additions & 19 deletions refinery/data_set_manager/search_indexes.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from haystack import indexes
from haystack.exceptions import SkipDocument

import core
from file_store.models import FileStoreItem

from .models import AnnotatedNode, Assay, Node
Expand Down Expand Up @@ -71,32 +72,50 @@ def _assay_data(self, object):
data[key].add(assay_attr)
return data

@staticmethod
def _check_skip_indexing_conditions(node):
if node.type not in Node.INDEXED_FILES:
raise SkipDocument()

try:
core.models.AnalysisNodeConnection.objects.get(
node=node,
is_refinery_file=False,
direction=core.models.OUTPUT_CONNECTION
)
except (core.models.AnalysisNodeConnection.DoesNotExist,
core.models.AnalysisNodeConnection.MultipleObjectsReturned):
# Not all Nodes will have an AnalysisNodeConnection
# and that's okay
pass
else:
raise SkipDocument()

# dynamic fields:
# https://groups.google.com/forum/?fromgroups#!topic/django-haystack/g39QjTkN-Yg
# http://stackoverflow.com/questions/7399871/django-haystack-sort-results-by-title
def prepare(self, object):
if object.type not in Node.INDEXED_FILES:
raise SkipDocument()
def prepare(self, node):
self._check_skip_indexing_conditions(node)

data = super(NodeIndex, self).prepare(object)
annotations = AnnotatedNode.objects.filter(node=object)
id_suffix = str(object.study.id)
data = super(NodeIndex, self).prepare(node)
annotations = AnnotatedNode.objects.filter(node=node)
id_suffix = str(node.study.id)

try:
data_set = object.study.get_dataset()
data_set = node.study.get_dataset()
data['data_set_uuid'] = data_set.uuid
except RuntimeError as e:
logger.warn(e)

if object.assay is not None:
id_suffix += "_" + str(object.assay.id)
if node.assay is not None:
id_suffix += "_" + str(node.assay.id)

id_suffix = "_" + id_suffix + "_s"

data['filename_Characteristics' + NodeIndex.GENERIC_SUFFIX] = \
re.sub(r'.*/', '', data['name'])

data.update(self._assay_data(object))
data.update(self._assay_data(node))

# create dynamic fields for each attribute
for annotation in annotations:
Expand Down Expand Up @@ -137,7 +156,7 @@ def prepare(self, object):

try:
file_store_item = FileStoreItem.objects.get(
uuid=object.file_uuid
uuid=node.file_uuid
)
except(FileStoreItem.DoesNotExist,
FileStoreItem.MultipleObjectsReturned) as e:
Expand Down Expand Up @@ -185,21 +204,21 @@ def prepare(self, object):
NodeIndex.DOWNLOAD_URL:
download_url,
NodeIndex.TYPE_PREFIX + id_suffix:
object.type,
node.type,
NodeIndex.NAME_PREFIX + id_suffix:
object.name,
node.name,
NodeIndex.FILETYPE_PREFIX + id_suffix:
"" if file_store_item is None
else file_store_item.get_filetype(),
NodeIndex.ANALYSIS_UUID_PREFIX + id_suffix:
NOT_AVAILABLE if object.get_analysis() is None
else object.get_analysis().name,
NOT_AVAILABLE if node.get_analysis() is None
else node.get_analysis().name,
NodeIndex.SUBANALYSIS_PREFIX + id_suffix:
(-1 if object.subanalysis is None # TODO: upgrade flake8
else object.subanalysis), # and remove parentheses
(-1 if node.subanalysis is None # TODO: upgrade flake8
else node.subanalysis), # and remove parentheses
NodeIndex.WORKFLOW_OUTPUT_PREFIX + id_suffix:
NOT_AVAILABLE if object.workflow_output is None
else object.workflow_output
NOT_AVAILABLE if node.workflow_output is None
else node.workflow_output
})

return data
104 changes: 79 additions & 25 deletions refinery/data_set_manager/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,9 @@
from mock import ANY
from rest_framework.test import APIClient, APIRequestFactory, APITestCase

from core.models import Analysis, DataSet, ExtendedGroup, InvestigationLink
from core.models import (INPUT_CONNECTION, OUTPUT_CONNECTION, Analysis,
AnalysisNodeConnection, DataSet, ExtendedGroup,
InvestigationLink)
from core.tests import TestMigrations
from core.views import NodeViewSet
import data_set_manager
Expand Down Expand Up @@ -1937,9 +1939,7 @@ def test_skip_types(self):
with self.assertRaises(SkipDocument):
NodeIndex().prepare(self.node)

def _assert_valid_node_index_preparation(self,
node, expected_download_url=None,
expected_filetype=None):
def _prepare_node_index(self, node):
data = NodeIndex().prepare(node)
data = dict(
(
Expand All @@ -1952,8 +1952,14 @@ def _assert_valid_node_index_preparation(self,
)
for (key, value) in data.items()
)
return data

def _assert_node_index_prepared_correctly(self,
data_to_be_indexed,
expected_download_url=None,
expected_filetype=None):
self.assertEqual(
data,
data_to_be_indexed,
{
'REFINERY_ANALYSIS_UUID_#_#_s': 'N/A',
'REFINERY_DOWNLOAD_URL_s': expected_download_url,
Expand Down Expand Up @@ -1989,15 +1995,15 @@ def _assert_valid_node_index_preparation(self,
'workflow_output': None
}
)
return data

def test_prepare_node_good_datafile(self):
index_data = self._assert_valid_node_index_preparation(
self.node,
data_to_be_indexed = self._prepare_node_index(self.node)
self._assert_node_index_prepared_correctly(
data_to_be_indexed,
expected_download_url=self.file_store_item.get_datafile_url()
)
self.assertRegexpMatches(
index_data['REFINERY_DOWNLOAD_URL_s'],
data_to_be_indexed['REFINERY_DOWNLOAD_URL_s'],
r'^http://example.com/media/file_store/.+/test_file.+txt$'
# There may or may not be a suffix on "test_file",
# depending on environment. Don't make it too strict!
Expand All @@ -2007,8 +2013,8 @@ def test_prepare_node_remote_datafile_source(self):
self.file_store_item.datafile.delete()
self.file_store_item.source = "http://www.example.com/test.txt"
self.file_store_item.save()
self._assert_valid_node_index_preparation(
self.node,
self._assert_node_index_prepared_correctly(
self._prepare_node_index(self.node),
expected_download_url=self.file_store_item.source
)

Expand All @@ -2020,8 +2026,8 @@ def test_prepare_node_missing_datafile(self):
"get_import_status",
return_value=SUCCESS
) as get_import_status_mock:
self._assert_valid_node_index_preparation(
self.node,
self._assert_node_index_prepared_correctly(
self._prepare_node_index(self.node),
expected_download_url=NOT_AVAILABLE
)
self.assertTrue(get_import_status_mock.called)
Expand All @@ -2034,16 +2040,16 @@ def test_prepare_node_pending_yet_existing_file_import_task(self):
"get_import_status",
return_value=PENDING
):
self._assert_valid_node_index_preparation(
self.node,
self._assert_node_index_prepared_correctly(
self._prepare_node_index(self.node),
expected_download_url=PENDING
)

def test_prepare_node_pending_non_existent_file_import_task(self):
self.import_task.delete()
self.file_store_item.datafile.delete()
self._assert_valid_node_index_preparation(
self.node,
self._assert_node_index_prepared_correctly(
self._prepare_node_index(self.node),
expected_download_url=NOT_AVAILABLE
)

Expand All @@ -2052,15 +2058,15 @@ def test_prepare_node_no_file_import_task_id_yet(self):
self.file_store_item.import_task_id = ""
self.file_store_item.save()
self.import_task.delete()
self._assert_valid_node_index_preparation(
self.node,
self._assert_node_index_prepared_correctly(
self._prepare_node_index(self.node),
expected_download_url=PENDING
)

def test_prepare_node_no_file_store_item(self):
self.file_store_item.delete()
self._assert_valid_node_index_preparation(
self.node,
self._assert_node_index_prepared_correctly(
self._prepare_node_index(self.node),
expected_download_url=NOT_AVAILABLE,
expected_filetype=""
)
Expand All @@ -2074,16 +2080,64 @@ def test_prepare_node_s3_file_store_item_source_no_datafile(self):
"get_import_status",
return_value=SUCCESS
):
self._assert_valid_node_index_preparation(
self.node,
self._assert_node_index_prepared_correctly(
self._prepare_node_index(self.node),
expected_download_url=NOT_AVAILABLE
)

def test_prepare_node_s3_file_store_item_source_with_datafile(self):
self.file_store_item.source = "s3://test/test.txt"
self.file_store_item.save()
self._assert_valid_node_index_preparation(
self.node,
self._assert_node_index_prepared_correctly(
self._prepare_node_index(self.node),
expected_download_url=self.file_store_item.get_datafile_url()
)

def _create_analysis_node_connection(self, direction, is_refinery_file):
user = User.objects.create_user("test", "", "test")
make_analyses_with_single_dataset(1, user)

AnalysisNodeConnection.objects.create(
analysis=Analysis.objects.first(),
node=self.node,
direction=direction,
step=1,
name="{} Analysis Node Connection".format(direction),
filename="test.txt",
is_refinery_file=is_refinery_file
)

def test_prepare_node_with_non_exposed_input_node_connection_isnt_skipped(
self
):
self._create_analysis_node_connection(INPUT_CONNECTION, False)
self._assert_node_index_prepared_correctly(
self._prepare_node_index(self.node),
expected_download_url=self.file_store_item.get_datafile_url()
)

def test_prepare_node_with_exposed_input_node_connection_isnt_skipped(
self
):
self._create_analysis_node_connection(INPUT_CONNECTION, True)
self._assert_node_index_prepared_correctly(
self._prepare_node_index(self.node),
expected_download_url=self.file_store_item.get_datafile_url()
)

def test_prepare_node_with_non_exposed_output_node_connection_is_skipped(
self
):
self._create_analysis_node_connection(OUTPUT_CONNECTION, False)
with self.assertRaises(SkipDocument):
self._prepare_node_index(self.node)

def test_prepare_node_with_exposed_output_node_connection_isnt_skipped(
self
):
self._create_analysis_node_connection(OUTPUT_CONNECTION, True)
self._assert_node_index_prepared_correctly(
self._prepare_node_index(self.node),
expected_download_url=self.file_store_item.get_datafile_url()
)

Expand Down

0 comments on commit c8a9cb3

Please sign in to comment.