refinery-platform · ilan-gold · Oct 22, 2019 · Oct 4, 2019 · Oct 4, 2019 · Oct 4, 2019
diff --git a/refinery/analysis_manager/tasks.py b/refinery/analysis_manager/tasks.py
@@ -14,7 +14,7 @@
 from celery.task.sets import TaskSet
 
 import core
-from core.models import Analysis, AnalysisResult, Workflow
+from core.models import Analysis, AnalysisResult, Workflow, OUTPUT_CONNECTION
 from file_store.models import FileStoreItem, FileExtension
 from file_store.tasks import FileImportTask
 import tool_manager
@@ -473,7 +473,11 @@ def _get_galaxy_download_task_ids(analysis):
     tool.create_analysis_output_node_connections()
     galaxy_instance = analysis.workflow.workflow_engine.instance
     try:
-        download_list = tool.get_galaxy_dataset_download_list()
+        download_list = tool_manager.models.AnalysisNodeConnection.\
+            objects.filter(
+                is_refinery_file=True, analysis=analysis,
+                direction=OUTPUT_CONNECTION
+            )
     except galaxy.client.ConnectionError as exc:
         error_msg = \
             "Error downloading Galaxy history files for analysis '%s': %s"
@@ -485,14 +489,14 @@ def _get_galaxy_download_task_ids(analysis):
     # Iterating through files in current galaxy history
     for results in download_list:
         # download file if result state is "ok"
-        if results['state'] == 'ok':
-            file_extension = results["type"]
-            result_name = "{}.{}".format(results['name'], file_extension)
+        if results.state == 'ok':
+            file_extension = results.filetype
+            result_name = "{}".format(results.filename)
             # size of file defined by galaxy
-            file_size = results['file_size']
+            file_size = results.file_size
             file_store_item = FileStoreItem(source=urlparse.urljoin(
                 galaxy_instance.base_url,
-                "datasets/{}/display?to_ext=txt".format(results['dataset_id'])
+                "datasets/{}/display?to_ext=txt".format(results.dataset_id)
             ))
             # workaround to set the correct file type for zip archives of
             # FastQC HTML reports produced by Galaxy dynamically

diff --git a/refinery/config/settings/base.py b/refinery/config/settings/base.py
@@ -315,7 +315,7 @@ def get_setting(name, settings=local_settings, default=None):
 CELERYD_MAX_TASKS_PER_CHILD = get_setting("CELERYD_MAX_TASKS_PER_CHILD")
 CELERY_ROUTES = {"file_store.tasks.FileImportTask": {"queue": "file_import"}}
 CELERY_ACCEPT_CONTENT = ['pickle']
-CELERYD_TASK_SOFT_TIME_LIMIT = 60  # seconds
+CELERYD_TASK_SOFT_TIME_LIMIT = 180  # seconds
 CELERYBEAT_SCHEDULE = {
     'collect_site_statistics': {
         'task': 'core.tasks.collect_site_statistics',

diff --git a/refinery/core/migrations/0038_auto_20191015_1423.py b/refinery/core/migrations/0038_auto_20191015_1423.py
@@ -0,0 +1,29 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('core', '0037_remove_analysisresult_analysis_uuid'),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name='analysisnodeconnection',
+            name='dataset_id',
+            field=models.CharField(max_length=100, null=True, blank=True),
+        ),
+        migrations.AddField(
+            model_name='analysisnodeconnection',
+            name='file_size',
+            field=models.IntegerField(null=True, blank=True),
+        ),
+        migrations.AddField(
+            model_name='analysisnodeconnection',
+            name='state',
+            field=models.CharField(max_length=100, null=True, blank=True),
+        ),
+    ]
diff --git a/refinery/core/models.py b/refinery/core/models.py
@@ -5,7 +5,6 @@
 '''
 from __future__ import absolute_import
 
-import ast
 from collections import defaultdict
 from datetime import datetime
 import json
@@ -1067,7 +1066,7 @@ def __str__(self):
 
     def get_expanded_workflow_graph(self):
         return tool_manager.utils.create_expanded_workflow_graph(
-            ast.literal_eval(self.workflow_copy)
+            json.loads(self.workflow_copy)
         )
 
     def has_nodes_used_in_downstream_analyses(self):
@@ -1730,6 +1729,12 @@ class AnalysisNodeConnection(models.Model):
                                            default=False)
     galaxy_dataset_name = models.CharField(null=True, blank=True,
                                            max_length=250)
+    # state from galaxy
+    state = models.CharField(null=True, blank=True, max_length=100)
+    # size of the file
+    file_size = models.IntegerField(null=True, blank=True)
+    # galaxy dataset id
+    dataset_id = models.CharField(null=True, blank=True, max_length=100)
 
     def __unicode__(self):
         return "{}: {}_{} ({}) {}".format(

diff --git a/refinery/tool_manager/migrations/0030_auto_20191007_0941.py b/refinery/tool_manager/migrations/0030_auto_20191007_0941.py
@@ -0,0 +1,19 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('tool_manager', '0029_parameter_uuid'),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name='workflowtool',
+            name='invocation',
+            field=models.TextField(null=True, blank=True),
+        )
+    ]
diff --git a/refinery/tool_manager/models.py b/refinery/tool_manager/models.py
@@ -673,6 +673,7 @@ class WorkflowTool(Tool):
     REVERSE = "reverse"
     TOOL_ID = "tool_id"
     WORKFLOW_OUTPUTS = "workflow_outputs"
+    invocation = models.TextField(null=True, blank=True)
 
     class Meta:
         verbose_name = "workflowtool"
@@ -769,7 +770,7 @@ def _create_analysis(self):
         self.set_analysis(analysis.uuid)
 
         workflow_dict = self._get_workflow_dict()
-        self.analysis.workflow_copy = workflow_dict
+        self.analysis.workflow_copy = json.dumps(workflow_dict)
         self.analysis.workflow_steps_num = len(workflow_dict["steps"].keys())
         self.analysis.set_owner(self.get_owner())
         self.analysis.save()
@@ -793,8 +794,11 @@ def create_analysis_output_node_connections(self):
         """Create the AnalysisNodeConnection objects corresponding to the
         output Nodes (Derived Data) of a WorkflowTool launch
         """
-        exposed_workflow_outputs = self._get_exposed_galaxy_datasets()
-        for galaxy_dataset in self._get_galaxy_history_dataset_list():
+        exposed_dataset_list = self._get_galaxy_history_dataset_list()
+        exposed_workflow_outputs = self._get_exposed_galaxy_datasets(
+            exposed_dataset_list=exposed_dataset_list
+        )
+        for galaxy_dataset in exposed_dataset_list:
             AnalysisNodeConnection.objects.create(
                 analysis=self.analysis, direction=OUTPUT_CONNECTION,
                 name=self._get_creating_job_output_name(galaxy_dataset),
@@ -803,7 +807,10 @@ def create_analysis_output_node_connections(self):
                 filename=self._get_galaxy_dataset_filename(galaxy_dataset),
                 filetype=galaxy_dataset["file_ext"],
                 is_refinery_file=galaxy_dataset in exposed_workflow_outputs,
-                galaxy_dataset_name=galaxy_dataset["name"]
+                galaxy_dataset_name=galaxy_dataset["name"],
+                state=galaxy_dataset['state'],
+                file_size=galaxy_dataset['file_size'],
+                dataset_id=galaxy_dataset['id']
             )
 
     def _create_collection_description(self):
@@ -1095,7 +1102,7 @@ def _get_galaxy_history_dataset_list(self):
         ]
         return retained_datasets
 
-    def _get_exposed_galaxy_datasets(self):
+    def _get_exposed_galaxy_datasets(self, exposed_dataset_list=None):
         """
         Retrieve all Galaxy Datasets that correspond to an asterisked
         output in the Galaxy workflow editor.
@@ -1108,7 +1115,9 @@ def _get_exposed_galaxy_datasets(self):
         explicitly exposed
         """
         exposed_galaxy_datasets = []
-        for galaxy_dataset in self._get_galaxy_history_dataset_list():
+        if exposed_dataset_list is None:
+            exposed_dataset_list = self._get_galaxy_history_dataset_list()
+        for galaxy_dataset in exposed_dataset_list:
             creating_job = self._get_galaxy_dataset_job(galaxy_dataset)
 
             # `tool_id` corresponds to the descriptive name of a galaxy
@@ -1119,7 +1128,9 @@ def _get_exposed_galaxy_datasets(self):
                 )
                 workflow_steps_dict = self._get_workflow_dict()["steps"]
                 creating_job_output_name = (
-                    self._get_creating_job_output_name(galaxy_dataset)
+                    self._get_creating_job_output_name(
+                        galaxy_dataset, creating_job
+                    )
                 )
                 workflow_step_output_names = [
                     workflow_output["output_name"] for workflow_output in
@@ -1164,10 +1175,19 @@ def _get_galaxy_workflow_invocation(self):
         """
         Fetch our Galaxy Workflow's invocation data.
         """
-        return self.galaxy_connection.workflows.show_invocation(
-            self.galaxy_workflow_history_id,
-            self.get_galaxy_dict()[self.GALAXY_WORKFLOW_INVOCATION_DATA]["id"]
-        )
+        # separate if-then assignment needed to avoid using the dict stored
+        # in self.invocation before .save() is called
+        if self.invocation == '' or self.invocation is None:
+            invocation = self.galaxy_connection.workflows.show_invocation(
+                self.galaxy_workflow_history_id,
+                self.get_galaxy_dict()
+                [self.GALAXY_WORKFLOW_INVOCATION_DATA]["id"]
+            )
+            self.invocation = json.dumps(invocation)
+            self.save()
+        else:
+            invocation = json.loads(self.invocation)
+        return invocation
 
     @handle_bioblend_exceptions
     def _get_refinery_input_file_id(self, galaxy_dataset_dict):
@@ -1248,9 +1268,19 @@ def _get_tool_inputs_dict(self, workflow_step):
 
     @handle_bioblend_exceptions
     def _get_workflow_dict(self):
-        return self.galaxy_connection.workflows.export_workflow_dict(
-            self.get_workflow_internal_id()
-        )
+        # separate if-then assignment needed to avoid using the dict stored
+        # in workflow_copy before .save() is called
+        if self.analysis.workflow_copy == '' \
+                or self.analysis.workflow_copy is None:
+            workflow_copy = \
+                self.galaxy_connection.workflows.export_workflow_dict(
+                    self.get_workflow_internal_id()
+                )
+            self.analysis.workflow_copy = json.dumps(workflow_copy)
+            self.analysis.save()
+        else:
+            workflow_copy = json.loads(self.analysis.workflow_copy)
+        return workflow_copy
 
     def get_workflow_internal_id(self):
         return self.tool_definition.workflow.internal_id
@@ -1265,19 +1295,21 @@ def _get_workflow_step(self, galaxy_dataset_dict):
         # `input` step i.e. `0`
         return self.INPUT_STEP_NUMBER
 
-    def _get_creating_job_output_name(self, galaxy_dataset_dict):
+    def _get_creating_job_output_name(self, galaxy_dataset_dict,
+                                      creating_job=None):
         """
         Retrieve the specified output name from the creating Galaxy Job that
         corresponds to a Galaxy Dataset
         :param galaxy_dataset_dict: dict containing information about a
         Galaxy Dataset.
-
+        :param creating_job: an optional argument to prevent repeat request
         This is useful if there are any post-job-actions in place to do
         renaming of said output dataset.
 
         :return: The proper output name of our galaxy dataset
         """
-        creating_job = self._get_galaxy_dataset_job(galaxy_dataset_dict)
+        if creating_job is None:
+            creating_job = self._get_galaxy_dataset_job(galaxy_dataset_dict)
         creating_job_outputs = creating_job["outputs"]
         workflow_step_output_name = [
             output_name for output_name in creating_job_outputs.keys()