refinery-platform · hackdna · Apr 19, 2018 · Mar 20, 2018 · Mar 22, 2018 · Mar 22, 2018
diff --git a/refinery/config/settings/base.py b/refinery/config/settings/base.py
@@ -317,6 +317,11 @@ def get_setting(name, settings=local_settings, default=None):
 
 EMAIL_BACKEND = "django.core.mail.backends.console.EmailBackend"
 
+# for external functions called in Celery tasks
+CELERYD_LOG_FORMAT = '%(asctime)s %(levelname)-8s %(name)s:%(lineno)s ' \
+                     '%(funcName)s() - %(message)s'
+CELERYD_TASK_LOG_FORMAT = '%(asctime)s %(levelname)-8s %(name)s:%(lineno)s ' \
+                          '%(funcName)s[%(task_id)s] - %(message)s'
 # for system stability
 CELERYD_MAX_TASKS_PER_CHILD = get_setting("CELERYD_MAX_TASKS_PER_CHILD")
 CELERY_ROUTES = {"file_store.tasks.import_file": {"queue": "file_import"}}

diff --git a/refinery/core/models.py b/refinery/core/models.py
@@ -799,19 +799,12 @@ def _dataset_delete(sender, instance, *args, **kwargs):
     See: https://docs.djangoproject.com/en/1.8/topics/db/models/
     #overriding-model-methods
     """
-
-    # terminate any running file import tasks
-    for file_store_item in instance.get_file_store_items():
-        file_store_item.terminate_file_import_task()
-
-    related_investigation_links = instance.get_investigation_links()
-
     with transaction.atomic():
         # delete FileStoreItem and datafile corresponding to the
         # metadata file used to generate the DataSet
         instance.get_metadata_as_file_store_item().delete()
 
-        for investigation_link in related_investigation_links:
+        for investigation_link in instance.get_investigation_links():
             investigation_link.get_node_collection().delete()
 
     delete_data_set_index(instance)

diff --git a/refinery/core/tests.py b/refinery/core/tests.py
@@ -737,7 +737,8 @@ def test_analysis_deletion_removes_related_objects(self):
     def test_analysis_bulk_deletion_removes_related_objects(self):
         # make a second Analysis
         make_analyses_with_single_dataset(1, self.user)
-        Analysis.objects.all().delete()
+        with mock.patch('celery.result.AsyncResult'):
+            Analysis.objects.all().delete()
 
         self.assertEqual(Analysis.objects.count(), 0)
         self.assertEqual(AnalysisNodeConnection.objects.count(), 0)

diff --git a/refinery/data_set_manager/search_indexes.py b/refinery/data_set_manager/search_indexes.py
@@ -9,12 +9,11 @@
 
 from django.conf import settings
 
-from celery.states import PENDING, SUCCESS
-from constants import NOT_AVAILABLE
-from djcelery.models import TaskMeta
+import celery
 from haystack import indexes
 from haystack.exceptions import SkipDocument
 
+import constants
 import core
 from file_store.models import FileStoreItem
 
@@ -118,46 +117,17 @@ def prepare(self, node):
                FileStoreItem.MultipleObjectsReturned) as e:
             logger.error("Couldn't properly fetch FileStoreItem: %s", e)
             file_store_item = None
-            download_url = NOT_AVAILABLE
+            download_url_or_state = constants.NOT_AVAILABLE
             data['filetype_Characteristics' + NodeIndex.GENERIC_SUFFIX] = ''
         else:
             data['filetype_Characteristics' + NodeIndex.GENERIC_SUFFIX] = \
                 file_store_item.filetype
-            download_url = file_store_item.get_datafile_url()
-            if download_url is None:
-                if not file_store_item.import_task_id:
-                    logger.debug("No import_task_id yet for FileStoreItem "
-                                 "with UUID: %s", file_store_item.uuid)
-                    download_url = PENDING
-                else:
-                    logger.debug(
-                        "FileStoreItem with UUID: %s has import_task_id: %s",
-                        file_store_item.uuid,
-                        file_store_item.import_task_id
-                    )
-                    if file_store_item.get_import_status() == SUCCESS:
-                        download_url = NOT_AVAILABLE
-                    else:
-                        # The underlying Celery code in
-                        # FileStoreItem.get_import_status() makes an assumption
-                        # that a result is "probably" PENDING even if it can't
-                        # find an associated Task. See:
-                        # https://github.com/celery/celery/blob/v3.1.20/celery/
-                        # backends/amqp.py#L192-L193 So we double check here to
-                        # make sure said assumption holds up
-                        try:
-                            TaskMeta.objects.get(
-                                task_id=file_store_item.import_task_id
-                            )
-                        except TaskMeta.DoesNotExist:
-                            logger.debug(
-                                "No file_import task for FileStoreItem with "
-                                "UUID: %s",
-                                file_store_item.uuid
-                            )
-                            download_url = NOT_AVAILABLE
-                        else:
-                            download_url = PENDING
+            download_url_or_state = file_store_item.get_datafile_url()
+            if download_url_or_state is None:
+                download_url_or_state = file_store_item.get_import_status()
+                # UI can not handle FAILURE state
+                if download_url_or_state == celery.states.FAILURE:
+                    download_url_or_state = constants.NOT_AVAILABLE
 
         data.update(self._assay_data(node))
 
@@ -191,31 +161,28 @@ def prepare(self, node):
                 if value != "":
                     data[key].add(value)
                 else:
-                    data[key].add(NOT_AVAILABLE)
+                    data[key].add(constants.NOT_AVAILABLE)
 
         # iterate over all keys in data and join sets into strings
         for key, value in data.iteritems():
             if type(value) is set:
                 data[key] = " + ".join(sorted(value))
 
         data.update({
-            NodeIndex.DOWNLOAD_URL:
-                download_url,
-            NodeIndex.TYPE_PREFIX + id_suffix:
-                node.type,
-            NodeIndex.NAME_PREFIX + id_suffix:
-                node.name,
+            NodeIndex.DOWNLOAD_URL: download_url_or_state,
+            NodeIndex.TYPE_PREFIX + id_suffix: node.type,
+            NodeIndex.NAME_PREFIX + id_suffix: node.name,
             NodeIndex.FILETYPE_PREFIX + id_suffix:
                 "" if file_store_item is None
                 else file_store_item.filetype,
             NodeIndex.ANALYSIS_UUID_PREFIX + id_suffix:
-                NOT_AVAILABLE if node.get_analysis() is None
+                constants.NOT_AVAILABLE if node.get_analysis() is None
                 else node.get_analysis().name,
             NodeIndex.SUBANALYSIS_PREFIX + id_suffix:
                 (-1 if node.subanalysis is None  # TODO: upgrade flake8
                  else node.subanalysis),         # and remove parentheses
             NodeIndex.WORKFLOW_OUTPUT_PREFIX + id_suffix:
-                NOT_AVAILABLE if node.workflow_output is None
+                constants.NOT_AVAILABLE if node.workflow_output is None
                 else node.workflow_output
         })
 

diff --git a/refinery/data_set_manager/tests.py b/refinery/data_set_manager/tests.py
@@ -17,15 +17,15 @@
 from django.http import QueryDict
 from django.test import LiveServerTestCase, TestCase
 
-from celery.states import PENDING, STARTED, SUCCESS
-from constants import NOT_AVAILABLE
+from celery.states import FAILURE, PENDING, STARTED, SUCCESS
 from djcelery.models import TaskMeta
 from guardian.shortcuts import assign_perm
 from haystack.exceptions import SkipDocument
 import mock
 from mock import ANY
 from rest_framework.test import APIClient, APIRequestFactory, APITestCase
 
+import constants
 from core.models import (INPUT_CONNECTION, OUTPUT_CONNECTION, Analysis,
                          AnalysisNodeConnection, DataSet, ExtendedGroup,
                          InvestigationLink)
@@ -1913,9 +1913,6 @@ def setUp(self):
 
         self.maxDiff = None
 
-    def tearDown(self):
-        FileStoreItem.objects.all().delete()
-
     def test_skip_types(self):
         self.node.type = 'Unknown File Type'
         with self.assertRaises(SkipDocument):
@@ -2007,34 +2004,38 @@ def test_prepare_node_pending_non_existent_file_import_task(self):
         self.import_task.delete()
         with mock.patch.object(FileStoreItem, 'get_datafile_url',
                                return_value=None):
-            self._assert_node_index_prepared_correctly(
-                self._prepare_node_index(self.node),
-                expected_download_url=NOT_AVAILABLE
-            )
+            with mock.patch.object(FileStoreItem, 'get_import_status',
+                                   return_value=FAILURE):
+                self._assert_node_index_prepared_correctly(
+                    self._prepare_node_index(self.node),
+                    expected_download_url=constants.NOT_AVAILABLE
+                )
 
     def test_prepare_node_no_file_import_task_id_yet(self):
         self.file_store_item.import_task_id = ""
         self.file_store_item.save()
         self.import_task.delete()
         self._assert_node_index_prepared_correctly(
-            self._prepare_node_index(self.node), expected_download_url=PENDING
+            self._prepare_node_index(self.node),
+            expected_download_url=constants.NOT_AVAILABLE
         )
 
     def test_prepare_node_no_file_store_item(self):
-        self.file_store_item.delete()
+        with mock.patch('celery.result.AsyncResult'):
+            self.file_store_item.delete()
         self._assert_node_index_prepared_correctly(
             self._prepare_node_index(self.node),
-            expected_download_url=NOT_AVAILABLE, expected_filetype=''
+            expected_download_url=constants.NOT_AVAILABLE, expected_filetype=''
         )
 
     def test_prepare_node_s3_file_store_item_source_no_datafile(self):
         self.file_store_item.source = 's3://test/test.txt'
         self.file_store_item.save()
         with mock.patch.object(FileStoreItem, 'get_import_status',
-                               return_value=SUCCESS):
+                               return_value=FAILURE):
             self._assert_node_index_prepared_correctly(
                 self._prepare_node_index(self.node),
-                expected_download_url=NOT_AVAILABLE,
+                expected_download_url=constants.NOT_AVAILABLE,
                 expected_filetype=self.file_store_item.filetype
             )
 

diff --git a/refinery/data_set_manager/views.py b/refinery/data_set_manager/views.py
@@ -329,14 +329,6 @@ def post(self, request, *args, **kwargs):
             else:
                 dataset_uuid = parse_isatab_invocation
 
-            try:
-                os.unlink(response['data']['temp_file_path'])
-            except OSError as e:
-                logger.error(
-                    "Couldn't unlink temporary file: %s %s",
-                    response['data']['temp_file_path'], e
-                )
-
             # import data files
             if dataset_uuid:
                 try:
@@ -585,23 +577,33 @@ def post(self, request, *args, **kwargs):
             # get a list of all uploaded S3 objects for the user
             s3 = boto3.resource('s3')
             s3_bucket = s3.Bucket(settings.UPLOAD_BUCKET)
+            # TODO: handle ParamValidationError (return error msg in response?)
             for s3_object in s3_bucket.objects.filter(Prefix=identity_id):
                 uploaded_s3_key_list.append(s3_object.key)
 
         for input_file_path in input_file_list:
             if not isinstance(input_file_path, unicode):
                 bad_file_list.append(input_file_path)
+                logger.error("Uploaded file path '%s' is not a string",
+                             input_file_path)
             else:
                 input_file_path = translate_file_source(input_file_path)
                 if settings.REFINERY_DEPLOYMENT_PLATFORM == 'aws':
                     # check if S3 object key exists
                     bucket_name, key = parse_s3_url(input_file_path)
                     if key not in uploaded_s3_key_list:
                         bad_file_list.append(os.path.basename(key))
+                        logger.debug("Object key '%s' does not exist in '%s'",
+                                     key, bucket_name)
+                    else:
+                        logger.debug("Object key '%s' exists in '%s'",
+                                     key, bucket_name)
                 else:  # POSIX file system
                     if not os.path.exists(input_file_path):
                         bad_file_list.append(input_file_path)
-            logger.debug("Checked file path: '%s'", input_file_path)
+                        logger.debug("File '%s' does not exist")
+                    else:
+                        logger.debug("File '%s' exists")
 
         # prefix output to protect from JSON vulnerability (stripped by
         # Angular)