Skip to content

Commit

Permalink
Mccalluc/add fields to datasetmanager index (#1824)
Browse files Browse the repository at this point in the history
* Add "generic" facets

* RE is slightly more general, but that is probably what we want

* May be gratuitous, but I think this makes it more clear that we begin from a stem and add modifiers

* Checkpoint: New test fails, looking for a FileStoreItem. (Error is swallowed, and method returns None, which doesn't feel like the right behavior?)

* Test passes, but TODO: add attributes that will exercise new logic

* remap keys as they are not stable

* remap values, too. Add tearDown

* ... but do not remap uuids

* Checkpoint: This should be adding an attribute... but the test still passes with no other changes, so not quite right
  • Loading branch information
mccalluc committed Jun 26, 2017
1 parent 77597a4 commit fb6d99b
Show file tree
Hide file tree
Showing 2 changed files with 110 additions and 44 deletions.
61 changes: 25 additions & 36 deletions refinery/data_set_manager/search_indexes.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,15 @@
'''

import logging
import string
import re

from django.conf import settings

from haystack import indexes

from .models import AnnotatedNode, Node
from file_store.models import FileStoreItem

from .models import AnnotatedNode, Node

logger = logging.getLogger(__name__)


Expand Down Expand Up @@ -60,33 +60,20 @@ def prepare(self, object):
uuid += "_" + str(object.assay.id)
# create dynamic fields for each attribute
for annotation in annotations:
if annotation.attribute_subtype is None:
name = annotation.attribute_type
else:
name = annotation.attribute_subtype + "_" + \
annotation.attribute_type
if annotation.attribute_value_unit is None:
value = annotation.attribute_value
else:
value = annotation.attribute_value + " " + \
annotation.attribute_value_unit
# replace problematic characters in field names
name = string.replace(name, "/",
settings.REFINERY_SOLR_SPACE_DYNAMIC_FIELDS)
name = string.replace(name, "(",
settings.REFINERY_SOLR_SPACE_DYNAMIC_FIELDS)
name = string.replace(name, ")",
settings.REFINERY_SOLR_SPACE_DYNAMIC_FIELDS)
name = string.replace(name, "#",
settings.REFINERY_SOLR_SPACE_DYNAMIC_FIELDS)
name = string.replace(name, ",",
settings.REFINERY_SOLR_SPACE_DYNAMIC_FIELDS)
name = string.replace(name, " ",
settings.REFINERY_SOLR_SPACE_DYNAMIC_FIELDS)
name = string.replace(name, "'",
settings.REFINERY_SOLR_SPACE_DYNAMIC_FIELDS)

key = name + "_" + uuid + "_s"
name = annotation.attribute_type
if annotation.attribute_subtype is not None:
name = annotation.attribute_subtype + "_" + name

value = annotation.attribute_value
if annotation.attribute_value_unit is not None:
value += " " + annotation.attribute_value_unit

name = re.sub(r'\W',
settings.REFINERY_SOLR_SPACE_DYNAMIC_FIELDS,
name)

uniq_key = name + "_" + uuid + "_s"
generic_key = name + "_generic_s"
# a node might have multiple parents with different attribute
# values for a given attribute
# e.g. parentA Characteristic[cell type] = K562 and
Expand All @@ -95,13 +82,15 @@ def prepare(self, object):
# concatenation of the unique list
# old version (only one attribute kept):
# data[key] = value
if key not in data:
data[key] = set()
if value != "":
data[key].add(value)
else:
data[key].add("N/A")
for key in (uniq_key, generic_key):
if key not in data:
data[key] = set()
if value != "":
data[key].add(value)
else:
data[key].add("N/A")
# iterate over all keys in data and join sets into strings
# TODO: This doesn't feel right: facet each separately?
for key, value in data.iteritems():
if type(value) is set:
data[key] = " + ".join(value)
Expand Down
93 changes: 85 additions & 8 deletions refinery/data_set_manager/tests.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,21 @@
import json
from StringIO import StringIO

import re
from StringIO import StringIO

from django.contrib.auth.models import User
from django.core.files.uploadedfile import (InMemoryUploadedFile,
SimpleUploadedFile)

from django.http import QueryDict
from django.test import TestCase

from rest_framework.test import APIClient, APIRequestFactory, APITestCase

from .models import Assay, AttributeOrder, Study, Investigation, Node
from core.models import DataSet, ExtendedGroup, InvestigationLink
from core.views import NodeViewSet
from file_store.models import FileStoreItem

from .models import (Assay, Attribute, AttributeOrder, Investigation, Node,
Study)
from .search_indexes import NodeIndex
from .serializers import AttributeOrderSerializer
from .utils import (create_facet_filter_query, customize_attribute_response,
escape_character_solr, format_solr_response,
Expand All @@ -24,9 +27,6 @@
insert_facet_field_filter, is_field_in_hidden_list,
objectify_facet_field_counts, update_attribute_order_ranks)
from .views import Assays, AssaysAttributes
from core.models import DataSet, ExtendedGroup, InvestigationLink
from core.views import NodeViewSet
from file_store.models import FileStoreItem


class AssaysAPITests(APITestCase):
Expand Down Expand Up @@ -1572,3 +1572,80 @@ def test_get_basic_node(self):
self.assertTrue('subanalysis' in self.get_response.data[0])
self.assertTrue('type' in self.get_response.data[0])
self.assertTrue('uuid' in self.get_response.data[0])


class NodeIndexTests(APITestCase):

def setUp(self):
investigation = Investigation.objects.create()
study = Study.objects.create(investigation=investigation)
assay = Assay.objects.create(study=study)

test_file = StringIO()
test_file.write('Coffee is great.\n')
file_store_item = FileStoreItem.objects.create(
datafile=InMemoryUploadedFile(
test_file,
field_name='tempfile',
name='test_file.txt',
content_type='text/plain',
size=len(test_file.getvalue()),
charset='utf-8'
)
)

self.node = Node.objects.create(
assay=assay,
study=study,
file_uuid=file_store_item.uuid)

self.assay_uuid = assay.uuid
self.study_uuid = study.uuid
self.file_uuid = file_store_item.uuid
self.node_uuid = self.node.uuid

Attribute.objects.create(
node=self.node,
type=Attribute.CHARACTERISTICS,
subtype='fake subtype',
value='fake value'
)

self.maxDiff = None

def tearDown(self):
FileStoreItem.objects.all().delete()

def test_prepare(self):
data = NodeIndex().prepare(self.node)
data = dict(
(
re.sub(r'\d+', '#', k),
re.sub(r'\d+', '#', v) if
type(v) in (unicode, str) and not('uuid' in k)
else v
)
for (k, v) in data.items())
self.assertEqual(data,
{'REFINERY_ANALYSIS_UUID_#_#_s': 'N/A',
'REFINERY_FILETYPE_#_#_s': None,
'REFINERY_NAME_#_#_s': u'',
'REFINERY_SUBANALYSIS_#_#_s': -1,
'REFINERY_TYPE_#_#_s': u'',
'REFINERY_WORKFLOW_OUTPUT_#_#_s': 'N/A',
'analysis_uuid': None,
'assay_uuid': self.assay_uuid,
u'django_ct': u'data_set_manager.node',
u'django_id': u'#',
'file_uuid': self.file_uuid,
'genome_build': None,
u'id': u'data_set_manager.node.#',
'is_annotation': False,
'name': u'',
'species': None,
'study_uuid': self.study_uuid,
'subanalysis': None,
'text': u'',
'type': u'',
'uuid': self.node_uuid,
'workflow_output': None})

0 comments on commit fb6d99b

Please sign in to comment.