Analysis refactor gui part7 (#2117)

* fix #1505 * improving some GUI stuff * improving some GUI stuff - missing lines * addressing all comments * ready for review * fix #1987 * initial commit * requested changes * fix filter job list * Fixing server cert (#2051) * fix get_studies * flake8 * fix #503 * fix #2010 * fix #1913 * fix errors * addressing @josenavas comment * flake8 * fix #1010 * fix #1066 (#2058) * addressing @josenavas comments * fix #1961 * fix #1837 * Automatic jobs & new stats (#2057) * fix #814, fix #1636 * fixing error in test-env * fixing stats.html call * adding img * addressing @josenavas comments * rm for loops * addresssing @ElDeveloper comments * generalizing this functionality * fix #1816 * fix #1959 * addressing @josenavas comments * addressing @josenavas comments * fixing error * fixed? * addressing @josenavas comments * addressing @wasade comments * fix flake8 * generate biom and metadata release (#2066) * initial commit * adding portal * addressing @josenavas comments * pid -> qiita_artifact_id * addressing @josenavas comments * addressing @ElDeveloper comments * rm 50.sql * database changes to fix 969 * adding delete * addressing @josenavas comments * addressing @ElDeveloper comments * duh! * fix generate_biom_and_metadata_release (#2072) * fix generate_biom_and_metadata_release * addressing @ElDeveloper comment * Removing qiita ware code that will not be used anymore * Organizing the handlers and new analysis description page * fixing timestamp * rm formats * st -> pt * Connecting the analysis creation and making interface responsive * Addressing @antgonza's comments * Initial artifact GUI refactor * Removing unused code * moving to ISO 8601 - wow :'( * fix errors * addressing @wasade comments * Adding can_edit call to the analysis * Fixing artifact rest API since not all artifacts have study * Adding can_be_publicized call to analysis * Adding QiitaHTTPError to handle errors gracefully * Adding safe_execution contextmanager * Fixing typo * Adding qiita test checker * Adapting some artifact handlers * Abstracting the graph reloading and adding some documentation * Fixing typo * Fixing changing artifact visibility * Fixing delete * Fixing artifact deletion * Adding default parameters to the commands * Fixing processing page * Fixing variable name * fixing private/public studies * Changing bdiv metrics to single choice * sanbox-to-sandbox * flake8 * Fixing patch * fixing other issues * adding share documentation * psycopg2 <= 2.7 * psycopg2 < 2.7 * Various small fixes to be able to run tests on the plugins * Adding private module * Fixing processing job completion * Fixing patch 52 * Fixing call * Fixing complete * small fixes * Adding processing handlers * Fixing url and bug on processing job workflow * Adding the private script runner * Adding is_analysis column to the command * Adding retrieval of commands excluding analysis commands * Addressing bug on retrieving information from redis * Enabling the command register endpoint to provide if the command is analysis only * Addressing @antgonza's comments * Addressing @wasade's comments
qiita-spots · May 17, 2017 · 39a03e3 · 39a03e3
1 parent 7cf4559
commit 39a03e3
Show file tree

Hide file tree

Showing 18 changed files with 809 additions and 122 deletions.
diff --git a/qiita_db/handlers/plugin.py b/qiita_db/handlers/plugin.py
@@ -104,12 +104,14 @@ def post(self, name, version):
             if outputs:
                 outputs = loads(outputs)
             dflt_param_set = loads(self.get_argument('default_parameter_sets'))
+            analysis_only = self.get_argument('analysis_only', False)
 
             parameters = req_params
             parameters.update(opt_params)
 
             cmd = qdb.software.Command.create(
-                plugin, cmd_name, cmd_desc, parameters, outputs)
+                plugin, cmd_name, cmd_desc, parameters, outputs,
+                analysis_only=analysis_only)
 
             if dflt_param_set is not None:
                 for name, vals in dflt_param_set.items():

diff --git a/qiita_db/handlers/tests/test_plugin.py b/qiita_db/handlers/tests/test_plugin.py
@@ -88,6 +88,25 @@ def test_post(self):
         self.assertEqual(obs.code, 200)
         obs = _get_command('QIIME', '1.9.1', 'New Command')
         self.assertEqual(obs.name, 'New Command')
+        self.assertFalse(obs.analysis_only)
+
+        # Create a new command that is analysis only
+        data = {
+            'name': 'New analysis command',
+            'description': 'Analysis command added for testing',
+            'required_parameters': dumps(
+                {'in_data': ['artifact:["BIOM"]', None]}),
+            'optional_parameters': dumps({'param1': ['string', 'default']}),
+            'outputs': dumps({'outtable': 'BIOM'}),
+            'default_parameter_sets': dumps({'dflt1': {'param1': 'test'}}),
+            'analysis_only': True
+        }
+        obs = self.post('/qiita_db/plugins/QIIME/1.9.1/commands/', data=data,
+                        headers=self.header)
+        self.assertEqual(obs.code, 200)
+        obs = _get_command('QIIME', '1.9.1', 'New analysis command')
+        self.assertEqual(obs.name, 'New analysis command')
+        self.assertTrue(obs.analysis_only)
 
 
 class CommandHandlerTests(OauthTestingBase):

diff --git a/qiita_db/private.py b/qiita_db/private.py
@@ -0,0 +1,74 @@
+# -----------------------------------------------------------------------------
+# Copyright (c) 2014--, The Qiita Development Team.
+#
+# Distributed under the terms of the BSD 3-clause License.
+#
+# The full license is in the file LICENSE, distributed with this software.
+# -----------------------------------------------------------------------------
+
+from json import dumps
+from sys import exc_info
+from time import sleep
+import traceback
+
+import qiita_db as qdb
+
+
+def build_analysis_files(job):
+    """Builds the files for an analysis
+
+    Parameters
+    ----------
+    job : qiita_db.processing_job.ProcessingJob
+        The processing job with the information for building the files
+    """
+    with qdb.sql_connection.TRN:
+        params = job.parameters.values
+        analysis_id = params['analysis']
+        merge_duplicated_sample_ids = params['merge_dup_sample_ids']
+        analysis = qdb.analysis.Analysis(analysis_id)
+        biom_files = analysis.build_files(merge_duplicated_sample_ids)
+
+        cmd = qdb.software.Command.get_validator('BIOM')
+        val_jobs = []
+        for dtype, biom_fp in biom_files:
+            validate_params = qdb.software.Parameters.load(
+                cmd, values_dict={'files': dumps({'biom': [biom_fp]}),
+                                  'artifact_type': 'BIOM',
+                                  'provenance': dumps({'job': job.id,
+                                                       'data_type': dtype}),
+                                  'analysis': analysis_id})
+            val_jobs.append(qdb.processing_job.ProcessingJob.create(
+                analysis.owner, validate_params))
+
+        job._set_validator_jobs(val_jobs)
+
+        for j in val_jobs:
+            j.submit()
+            sleep(1)
+
+
+TASK_DICT = {'build_analysis_files': build_analysis_files}
+
+
+def private_task(job_id):
+    """Complets a Qiita private task
+
+    Parameters
+    ----------
+    job_id : str
+        The job id
+    """
+    if job_id == 'register':
+        # We don't need to do anything here if Qiita is registering plugins
+        return
+
+    job = qdb.processing_job.ProcessingJob(job_id)
+    job.update_heartbeat_state()
+    task_name = job.command.name
+
+    try:
+        TASK_DICT[task_name](job)
+    except Exception:
+        job.complete(False, error="Error executing private task: %s"
+                                  % traceback.format_exception(*exc_info()))
diff --git a/qiita_db/processing_job.py b/qiita_db/processing_job.py
@@ -504,7 +504,8 @@ def _complete_artifact_definition(self, artifact_data):
             else:
                 # The artifact is uploaded by the user or is the initial
                 # artifact of an analysis
-                if job_params['analysis'] is not None:
+                if ('analysis' in job_params and
+                        job_params['analysis'] is not None):
                     pt = None
                     an = qdb.analysis.Analysis(job_params['analysis'])
                     sql = """SELECT data_type
@@ -567,11 +568,21 @@ def _complete_artifact_transformation(self, artifacts_data):
                 templates = set()
                 for artifact in self.input_artifacts:
                     templates.update(pt.id for pt in artifact.prep_templates)
+                template = None
+                analysis = None
                 if len(templates) > 1:
                     raise qdb.exceptions.QiitaDBError(
                         "Currently only single prep template "
                         "is allowed, found %d" % len(templates))
-                template = templates.pop()
+                elif len(templates) == 1:
+                    template = templates.pop()
+                else:
+                    # In this case we have 0 templates. What this means is that
+                    # this artifact is being generated in the analysis pipeline
+                    # All the artifacts included in the analysis pipeline
+                    # belong to the same analysis, so we can just ask the
+                    # first artifact for the analysis that it belongs to
+                    analysis = self.input_artifacts[0].analysis.id
 
                 # Once the validate job completes, it needs to know if it has
                 # been generated from a command (and how) or if it has been
@@ -592,6 +603,7 @@ def _complete_artifact_transformation(self, artifacts_data):
                     cmd, values_dict={'files': dumps(filepaths),
                                       'artifact_type': atype,
                                       'template': template,
+                                      'analysis': analysis,
                                       'provenance': dumps(provenance)})
                 validator_jobs.append(
                     ProcessingJob.create(self.user, validate_params))
@@ -1196,7 +1208,16 @@ def _raise_if_not_in_construction(self):
                      WHERE processing_job_workflow_id = %s"""
             qdb.sql_connection.TRN.add(sql, [self.id])
             res = qdb.sql_connection.TRN.execute_fetchflatten()
-            if len(res) != 1 or res[0] != 'in_construction':
+            # If the above SQL query returns a single element and the value
+            # is different from in construction, it means that all the jobs
+            # in the workflow are in the same status and it is not
+            # 'in_construction', hence raise the error. If the above SQL query
+            # returns more than value (len(res) > 1) it means that the workflow
+            # is no longer in construction cause some jobs have been submited
+            # for processing. Note that if the above query doesn't retrun any
+            # value, it means that no jobs are in the workflow and that means
+            # that the workflow is in construction.
+            if (len(res) == 1 and res[0] != 'in_construction') or len(res) > 1:
                 # The workflow is no longer in construction, raise an error
                 raise qdb.exceptions.QiitaDBOperationNotPermittedError(
                     "Workflow not in construction")

diff --git a/qiita_db/software.py b/qiita_db/software.py
@@ -44,7 +44,8 @@ class Command(qdb.base.QiitaObject):
     _table = "software_command"
 
     @classmethod
-    def get_commands_by_input_type(cls, artifact_types, active_only=True):
+    def get_commands_by_input_type(cls, artifact_types, active_only=True,
+                                   exclude_analysis=True):
         """Returns the commands that can process the given artifact types
 
         Parameters
@@ -70,6 +71,8 @@ def get_commands_by_input_type(cls, artifact_types, active_only=True):
                      WHERE artifact_type IN %s"""
             if active_only:
                 sql += " AND active = True"
+            if exclude_analysis:
+                sql += " AND is_analysis = False"
             qdb.sql_connection.TRN.add(sql, [tuple(artifact_types)])
             for c_id in qdb.sql_connection.TRN.execute_fetchflatten():
                 yield cls(c_id)
@@ -191,7 +194,8 @@ def exists(cls, software, name):
             return qdb.sql_connection.TRN.execute_fetchlast()
 
     @classmethod
-    def create(cls, software, name, description, parameters, outputs=None):
+    def create(cls, software, name, description, parameters, outputs=None,
+               analysis_only=False):
         r"""Creates a new command in the system
 
         The supported types for the parameters are:
@@ -222,6 +226,9 @@ def create(cls, software, name, description, parameters, outputs=None):
         outputs : dict, optional
             The description of the outputs that this command generated. The
             format is: {output_name: artifact_type}
+        analysis_only : bool, optional
+            If true, then the command will only be available on the analysis
+            pipeline. Default: False.
 
         Returns
         -------
@@ -297,10 +304,10 @@ def create(cls, software, name, description, parameters, outputs=None):
                                % (software.id, name))
             # Add the command to the DB
             sql = """INSERT INTO qiita.software_command
-                            (name, software_id, description)
-                     VALUES (%s, %s, %s)
+                            (name, software_id, description, is_analysis)
+                     VALUES (%s, %s, %s, %s)
                      RETURNING command_id"""
-            sql_params = [name, software.id, description]
+            sql_params = [name, software.id, description, analysis_only]
             qdb.sql_connection.TRN.add(sql, sql_params)
             c_id = qdb.sql_connection.TRN.execute_fetchlast()
 
@@ -508,6 +515,22 @@ def activate(self):
             qdb.sql_connection.TRN.add(sql, [True, self.id])
             return qdb.sql_connection.TRN.execute()
 
+    @property
+    def analysis_only(self):
+        """Returns if the command is an analysis-only command
+
+        Returns
+        -------
+        bool
+            Whether the command is analysis only or not
+        """
+        with qdb.sql_connection.TRN:
+            sql = """SELECT is_analysis
+                     FROM qiita.software_command
+                     WHERE command_id = %s"""
+            qdb.sql_connection.TRN.add(sql, [self.id])
+            return qdb.sql_connection.TRN.execute_fetchlast()
+
 
 class Software(qdb.base.QiitaObject):
     r"""A software package available in the system

diff --git a/qiita_db/support_files/patches/52.sql b/qiita_db/support_files/patches/52.sql
@@ -49,6 +49,11 @@ ALTER TABLE qiita.analysis ADD logging_id bigint  ;
 CREATE INDEX idx_analysis_0 ON qiita.analysis ( logging_id ) ;
 ALTER TABLE qiita.analysis ADD CONSTRAINT fk_analysis_logging FOREIGN KEY ( logging_id ) REFERENCES qiita.logging( logging_id )    ;
 
+-- Alter the software command table to differentiate between commands that
+-- apply to the analysis pipeline or commands that apply on the study
+-- processing pipeline
+ALTER TABLE qiita.software_command ADD is_analysis bool DEFAULT 'False' NOT NULL;
+
 -- We can handle some of the special cases here, so we simplify the work in the
 -- python patch
 
@@ -102,7 +107,7 @@ DECLARE
     baf_cmd_id      bigint;
 BEGIN
     INSERT INTO qiita.software (name, version, description, environment_script, start_script, software_type_id, active)
-        VALUES ('Qiita', 'alpha', 'Internal Qiita jobs', 'source activate qiita', 'qiita-private-2', 3, True)
+        VALUES ('Qiita', 'alpha', 'Internal Qiita jobs', 'source activate qiita', 'qiita-private-plugin', 3, True)
         RETURNING software_id INTO qiita_sw_id;
 
     INSERT INTO qiita.software_command (software_id, name, description)

diff --git a/qiita_db/support_files/patches/python_patches/52.py b/qiita_db/support_files/patches/python_patches/52.py
@@ -94,6 +94,7 @@ def create_non_rarefied_biom_artifact(analysis, biom_data, rarefied_table):
             # Note that we are sure that the biom table exists for sure, so
             # no need to check if biom_fp is undefined
             biom_table = load_table(biom_fp)
+            samples = set(samples).intersection(biom_table.ids())
             biom_table.filter(samples, axis='sample', inplace=True)
             new_table = new_table.merge(biom_table)
             ids_map.update({sid: "%d.%s" % (a_id, sid)
@@ -498,8 +499,9 @@ def transfer_job(analysis, command_id, params, input_artifact_id, job_data,
     qiime_id = TRN.execute_fetchlast()
 
     # Step 2: Insert the new commands in the software_command table
-    sql = """INSERT INTO qiita.software_command (software_id, name, description)
-             VALUES (%s, %s, %s)
+    sql = """INSERT INTO qiita.software_command
+                (software_id, name, description, is_analysis)
+             VALUES (%s, %s, %s, TRUE)
              RETURNING command_id"""
     TRN.add(sql, [qiime_id, 'Summarize Taxa', 'Plots taxonomy summaries at '
                             'different taxonomy levels'])
@@ -606,7 +608,7 @@ def transfer_job(analysis, command_id, params, input_artifact_id, job_data,
         [sum_taxa_cmd_id, 'Defaults',
          '{"sort": false, "metadata_category": ""}'],
         [bdiv_cmd_id, 'Unweighted UniFrac',
-         '{"metrics": "unweighted_unifrac", "tree": ""}'],
+         '{"metric": "unweighted_unifrac", "tree": ""}'],
         [arare_cmd_id, 'Defaults',
          '{"max_rare_depth": "Default", "tree": "", "num_steps": 10, '
          '"min_rare_depth": 10, "metrics": ["chao1", "observed_otus"]}'],
@@ -669,7 +671,10 @@ def transfer_job(analysis, command_id, params, input_artifact_id, job_data,
                     srare_cmd_out_id)
             else:
                 # The BIOM table was not rarefied, use current table as initial
-                initial_biom_id = transfer_file_to_artifact()
+                initial_biom_id = transfer_file_to_artifact(
+                    analysis['analysis_id'], analysis['timestamp'], None,
+                    biom_data['data_type_id'], None, 7,
+                    biom_data['filepath_id'])
 
             # Loop through all the jobs that used this biom table as input
             sql = """SELECT *