diff --git a/.travis.yml b/.travis.yml index 5a191929f..d7b5b0555 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,7 +4,9 @@ env: global: - PYTHON_VERSION=2.7 matrix: - - TEST_ADD_STUDIES=False + - TEST_ADD_STUDIES=False COVER_PACKAGE=qiita_db + - TEST_ADD_STUDIES=False COVER_PACKAGE=qiita_pet + - TEST_ADD_STUDIES=False COVER_PACKAGE="qiita_core qiita_ware" - TEST_ADD_STUDIES=True before_install: - redis-server --version @@ -18,7 +20,9 @@ before_install: - wget ftp://ftp.microbio.me/pub/qiita/ascp-install-3.5.4.102989-linux-64-qiita.sh -O ascp-install-3.5.4.102989-linux-64-qiita.sh - chmod +x ascp-install-3.5.4.102989-linux-64-qiita.sh - ./ascp-install-3.5.4.102989-linux-64-qiita.sh - - if [ ${TRAVIS_PULL_REQUEST} == "false" ]; then openssl aes-256-cbc -K $encrypted_a2e23aea5f14_key -iv $encrypted_a2e23aea5f14_iv -in qiita_core/support_files/config_test_travis.cfg.enc -out qiita_core/support_files/config_test_travis.cfg -d ; fi + # once we have ebi testing we should uncomment this line + # look for EBI below as it's part of this + # - if [ ${TRAVIS_PULL_REQUEST} == "false" ]; then openssl aes-256-cbc -K $encrypted_a2e23aea5f14_key -iv $encrypted_a2e23aea5f14_iv -in qiita_core/support_files/config_test_travis.cfg.enc -out qiita_core/support_files/config_test_travis.cfg -d ; fi install: # install a few of the dependencies that pip would otherwise try to install # when intalling scikit-bio @@ -27,22 +31,37 @@ install: 'pandas>=0.18' 'matplotlib>=1.1.0' 'scipy>0.13.0' 'numpy>=1.7' 'h5py>=2.3.1' - source activate qiita - pip install -U pip - - pip install sphinx sphinx-bootstrap-theme coveralls 'ipython[all]==2.4.1' + - pip install sphinx sphinx-bootstrap-theme 'ipython[all]==2.4.1' nose-timer codecov - travis_retry pip install . --process-dependency-links - 'echo "backend: Agg" > matplotlibrc' -script: + # Install the biom plugin so we can run the analysis tests + - pip install https://github.com/qiita-spots/qiita_client/archive/master.zip + - pip install https://github.com/qiita-spots/qtp-biom/archive/master.zip --process-dependency-links + - export QIITA_SERVER_CERT=`pwd`/qiita_core/support_files/server.crt + - mkdir ~/.qiita_plugins + - cp $PWD/qiita_core/support_files/BIOM\ type_2.1.4.conf ~/.qiita_plugins +before_script: + # Some of the tests rely on the plugin system to complete successfuly. + # Thus, we need a qiita webserver running to be able to execute the tests. - export MOI_CONFIG_FP=`pwd`/qiita_core/support_files/config_test.cfg - - if [ ${TRAVIS_PULL_REQUEST} == "false" ]; then - export QIITA_CONFIG_FP=`pwd`/qiita_core/support_files/config_test_travis.cfg; - export MOI_CONFIG_FP=`pwd`/qiita_core/support_files/config_test_travis.cfg; - fi + # EBI, see the end of before_install about why this block is commented out + # - if [ ${TRAVIS_PULL_REQUEST} == "false" ]; then + # export QIITA_CONFIG_FP=`pwd`/qiita_core/support_files/config_test_travis.cfg; + # export MOI_CONFIG_FP=`pwd`/qiita_core/support_files/config_test_travis.cfg; + # fi - ipython profile create qiita-general --parallel - qiita-env start_cluster qiita-general - qiita-env make --no-load-ontologies + - | + if [ ${TEST_ADD_STUDIES} == "False" ]; then + qiita pet webserver --no-build-docs start & + fi +script: + - sleep 5 - if [ ${TEST_ADD_STUDIES} == "True" ]; then test_data_studies/commands.sh ; fi - if [ ${TEST_ADD_STUDIES} == "True" ]; then qiita-cron-job ; fi - if [ ${TEST_ADD_STUDIES} == "False" ]; then qiita-test-install ; fi - - if [ ${TEST_ADD_STUDIES} == "False" ]; then nosetests --with-doctest --with-coverage -v --cover-package=qiita_db,qiita_pet,qiita_core,qiita_ware; fi + - if [ ${TEST_ADD_STUDIES} == "False" ]; then nosetests $COVER_PACKAGE --with-doctest --with-coverage --with-timer -v --cover-package=$COVER_PACKAGE; fi - flake8 qiita_* setup.py scripts/* - ls -R /home/travis/miniconda3/envs/qiita/lib/python2.7/site-packages/qiita_pet/support_files/doc/ - qiita pet webserver @@ -52,4 +71,4 @@ services: - redis-server - postgresql after_success: - - if [ ${TEST_ADD_STUDIES} == "False" ]; then coveralls ; fi + - if [ ${TEST_ADD_STUDIES} == "False" ]; then codecov ; fi diff --git a/README.rst b/README.rst index bf3213ab6..770e2f3ec 100644 --- a/README.rst +++ b/README.rst @@ -80,7 +80,7 @@ future. .. |Build Status| image:: https://travis-ci.org/biocore/qiita.png?branch=master :target: https://travis-ci.org/biocore/qiita -.. |Coverage Status| image:: https://coveralls.io/repos/biocore/qiita/badge.png?branch=master - :target: https://coveralls.io/r/biocore/qiita +.. |Coverage Status| image:: https://codecov.io/gh/biocore/qiita/branch/master/graph/badge.svg + :target: https://codecov.io/gh/biocore/qiita .. |Gitter| image:: https://badges.gitter.im/Join%20Chat.svg :target: https://gitter.im/biocore/qiita?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge diff --git a/qiita_core/support_files/BIOM type_2.1.4.conf b/qiita_core/support_files/BIOM type_2.1.4.conf new file mode 100644 index 000000000..d3eb040b4 --- /dev/null +++ b/qiita_core/support_files/BIOM type_2.1.4.conf @@ -0,0 +1,13 @@ +[main] +NAME = BIOM type +VERSION = 2.1.4 +DESCRIPTION = The Biological Observation Matrix format +ENVIRONMENT_SCRIPT = source activate qtp-biom +START_SCRIPT = start_biom +PLUGIN_TYPE = artifact definition +PUBLICATIONS = + +[oauth2] +SERVER_CERT = /home/travis/miniconda3/envs/qiita/lib/python2.7/site-packages/qiita_core/support_files/server.crt +CLIENT_ID = dHgaXDwq665ksFPqfIoD3Jt8KRXdSioTRa4lGa5mGDnz6JTIBf +CLIENT_SECRET = xqx61SD4M2EWbaS0WYv3H1nIemkvEAMIn16XMLjy5rTCqi7opCcWbfLINEwtV48bQ diff --git a/qiita_core/support_files/config_test.cfg b/qiita_core/support_files/config_test.cfg index 0e91b9176..aeb53ef6e 100644 --- a/qiita_core/support_files/config_test.cfg +++ b/qiita_core/support_files/config_test.cfg @@ -23,19 +23,19 @@ LOG_DIR = REQUIRE_APPROVAL = True # Base URL: DO NOT ADD TRAILING SLASH -BASE_URL = https://localhost +BASE_URL = https://localhost:21174 # Download path files -UPLOAD_DATA_DIR = /tmp/ +UPLOAD_DATA_DIR = /home/travis/miniconda3/envs/qiita/lib/python2.7/site-packages/qiita_db/support_files/test_data/uploads/ # Working directory path -WORKING_DIR = /tmp/ +WORKING_DIR = /home/travis/miniconda3/envs/qiita/lib/python2.7/site-packages/qiita_db/support_files/test_data/working_dir/ # Maximum upload size (in Gb) MAX_UPLOAD_SIZE = 100 # Path to the base directory where the data files are going to be stored -BASE_DATA_DIR = +BASE_DATA_DIR = /home/travis/miniconda3/envs/qiita/lib/python2.7/site-packages/qiita_db/support_files/test_data/ # Valid upload extension, comma separated. Empty for no uploads VALID_UPLOAD_EXTENSION = fastq,fastq.gz,txt,tsv,sff,fna,qual diff --git a/qiita_core/testing.py b/qiita_core/testing.py index e05f67a69..22bc9201c 100644 --- a/qiita_core/testing.py +++ b/qiita_core/testing.py @@ -43,9 +43,9 @@ def wait_for_prep_information_job(prep_id, raise_if_none=True): else: redis_info = loads(r_client.get(job_id)) while redis_info['status_msg'] == 'Running': - sleep(0.05) + sleep(0.5) redis_info = loads(r_client.get(job_id)) - sleep(0.05) + sleep(0.5) def wait_for_processing_job(job_id): @@ -58,5 +58,5 @@ def wait_for_processing_job(job_id): """ job = ProcessingJob(job_id) while job.status not in ('success', 'error'): - sleep(0.05) - sleep(0.5) + sleep(1.2) + sleep(1.2) diff --git a/qiita_db/__init__.py b/qiita_db/__init__.py index e606bfb83..e9e71d654 100644 --- a/qiita_db/__init__.py +++ b/qiita_db/__init__.py @@ -16,7 +16,6 @@ import environment_manager import exceptions import investigation -import job import logger import meta_util import ontology @@ -27,11 +26,12 @@ import study import user import processing_job +import private __version__ = "0.2.0-dev" __all__ = ["analysis", "artifact", "base", "commands", "environment_manager", - "exceptions", "investigation", "job", "logger", "meta_util", + "exceptions", "investigation", "logger", "meta_util", "ontology", "portal", "reference", "search", "software", "sql_connection", "study", "user", "util", - "metadata_template", "processing_job"] + "metadata_template", "processing_job", "private"] diff --git a/qiita_db/analysis.py b/qiita_db/analysis.py index d4d957429..aa66f9f39 100644 --- a/qiita_db/analysis.py +++ b/qiita_db/analysis.py @@ -18,12 +18,12 @@ # ----------------------------------------------------------------------------- from __future__ import division from itertools import product -from os.path import join, basename -from tarfile import open as taropen +from os.path import join from future.utils import viewitems from biom import load_table from biom.util import biom_open +from re import sub import pandas as pd from qiita_core.exceptions import IncompetentQiitaDeveloperError @@ -31,7 +31,7 @@ import qiita_db as qdb -class Analysis(qdb.base.QiitaStatusObject): +class Analysis(qdb.base.QiitaObject): """ Analysis object to access to the Qiita Analysis information @@ -41,15 +41,11 @@ class Analysis(qdb.base.QiitaStatusObject): name description samples - dropped_samples data_types - biom_tables - step + artifacts shared_with jobs pmid - parent - children Methods ------- @@ -63,25 +59,14 @@ class Analysis(qdb.base.QiitaStatusObject): exists create delete + add_artifact + set_error """ _table = "analysis" _portal_table = "analysis_portal" _analysis_id_column = 'analysis_id' - def _lock_check(self): - """Raises QiitaDBStatusError if analysis is not in_progress""" - if self.check_status({"queued", "running", "public", "completed", - "error"}): - raise qdb.exceptions.QiitaDBStatusError("Analysis is locked!") - - def _status_setter_checks(self): - r"""Perform a check to make sure not setting status away from public - """ - if self.check_status({"public"}): - raise qdb.exceptions.QiitaDBStatusError( - "Can't set status away from public!") - @classmethod def get_by_status(cls, status): """Returns all Analyses with given status @@ -97,19 +82,34 @@ def get_by_status(cls, status): All analyses in the database with the given status """ with qdb.sql_connection.TRN: - sql = """SELECT analysis_id - FROM qiita.{0} - JOIN qiita.{0}_status USING (analysis_status_id) - JOIN qiita.analysis_portal USING (analysis_id) - JOIN qiita.portal_type USING (portal_type_id) - WHERE status = %s AND portal = %s""".format(cls._table) - qdb.sql_connection.TRN.add(sql, [status, qiita_config.portal]) + # Sandboxed analyses are the analyses that have not been started + # and hence they don't have an artifact yet + if status == 'sandbox': + sql = """SELECT DISTINCT analysis + FROM qiita.analysis + JOIN qiita.analysis_portal USING (analysis_id) + JOIN qiita.portal_type USING (portal_type_id) + WHERE portal = %s AND analysis_id NOT IN ( + SELECT analysis_id + FROM qiita.analysis_artifact)""" + qdb.sql_connection.TRN.add(sql, [qiita_config.portal]) + else: + sql = """SELECT DISTINCT analysis_id + FROM qiita.analysis_artifact + JOIN qiita.artifact USING (artifact_id) + JOIN qiita.visibility USING (visibility_id) + JOIN qiita.analysis_portal USING (analysis_id) + JOIN qiita.portal_type USING (portal_type_id) + WHERE visibility = %s AND portal = %s""" + qdb.sql_connection.TRN.add(sql, [status, qiita_config.portal]) + return set( cls(aid) for aid in qdb.sql_connection.TRN.execute_fetchflatten()) @classmethod - def create(cls, owner, name, description, parent=None, from_default=False): + def create(cls, owner, name, description, from_default=False, + merge_duplicated_sample_ids=False): """Creates a new analysis on the database Parameters @@ -120,49 +120,39 @@ def create(cls, owner, name, description, parent=None, from_default=False): Name of the analysis description : str Description of the analysis - parent : Analysis object, optional - The analysis this one was forked from from_default : bool, optional If True, use the default analysis to populate selected samples. Default False. + merge_duplicated_sample_ids : bool, optional + If the duplicated sample ids in the selected studies should be + merged or prepended with the artifact ids. False (default) prepends + the artifact id + + Returns + ------- + qdb.analysis.Analysis + The newly created analysis """ with qdb.sql_connection.TRN: - status_id = qdb.util.convert_to_id( - 'in_construction', 'analysis_status', 'status') portal_id = qdb.util.convert_to_id( qiita_config.portal, 'portal_type', 'portal') + # Create the row in the analysis table + sql = """INSERT INTO qiita.{0} + (email, name, description) + VALUES (%s, %s, %s) + RETURNING analysis_id""".format(cls._table) + qdb.sql_connection.TRN.add( + sql, [owner.id, name, description]) + a_id = qdb.sql_connection.TRN.execute_fetchlast() + if from_default: - # insert analysis and move samples into that new analysis + # Move samples into that new analysis dflt_id = owner.default_analysis.id - - sql = """INSERT INTO qiita.{0} - (email, name, description, analysis_status_id) - VALUES (%s, %s, %s, %s) - RETURNING analysis_id""".format(cls._table) - qdb.sql_connection.TRN.add( - sql, [owner.id, name, description, status_id]) - a_id = qdb.sql_connection.TRN.execute_fetchlast() - # MAGIC NUMBER 3: command selection step - # needed so we skip the sample selection step - sql = """INSERT INTO qiita.analysis_workflow - (analysis_id, step) - VALUES (%s, %s)""" - qdb.sql_connection.TRN.add(sql, [a_id, 3]) - sql = """UPDATE qiita.analysis_sample SET analysis_id = %s WHERE analysis_id = %s""" qdb.sql_connection.TRN.add(sql, [a_id, dflt_id]) - else: - # insert analysis information into table as "in construction" - sql = """INSERT INTO qiita.{0} - (email, name, description, analysis_status_id) - VALUES (%s, %s, %s, %s) - RETURNING analysis_id""".format(cls._table) - qdb.sql_connection.TRN.add( - sql, [owner.id, name, description, status_id]) - a_id = qdb.sql_connection.TRN.execute_fetchlast() # Add to both QIITA and given portal (if not QIITA) sql = """INSERT INTO qiita.analysis_portal @@ -176,14 +166,28 @@ def create(cls, owner, name, description, parent=None, from_default=False): args.append([a_id, qp_id]) qdb.sql_connection.TRN.add(sql, args, many=True) - # add parent if necessary - if parent: - sql = """INSERT INTO qiita.analysis_chain - (parent_id, child_id) - VALUES (%s, %s)""" - qdb.sql_connection.TRN.add(sql, [parent.id, a_id]) + instance = cls(a_id) + + # Once the analysis is created, we can create the mapping file and + # the initial set of artifacts + plugin = qdb.software.Software.from_name_and_version( + 'Qiita', 'alpha') + cmd = plugin.get_command('build_analysis_files') + params = qdb.software.Parameters.load( + cmd, values_dict={ + 'analysis': a_id, + 'merge_dup_sample_ids': merge_duplicated_sample_ids}) + job = qdb.processing_job.ProcessingJob.create( + owner, params) + sql = """INSERT INTO qiita.analysis_processing_job + (analysis_id, processing_job_id) + VALUES (%s, %s)""" + qdb.sql_connection.TRN.add(sql, [a_id, job.id]) + qdb.sql_connection.TRN.execute() - return cls(a_id) + # Doing the submission outside of the transaction + job.submit() + return instance @classmethod def delete(cls, _id): @@ -204,15 +208,21 @@ def delete(cls, _id): if not cls.exists(_id): raise qdb.exceptions.QiitaDBUnknownIDError(_id, "analysis") + # Check if the analysis has any artifact + sql = """SELECT EXISTS(SELECT * + FROM qiita.analysis_artifact + WHERE analysis_id = %s)""" + qdb.sql_connection.TRN.add(sql, [_id]) + if qdb.sql_connection.TRN.execute_fetchlast(): + raise qdb.exceptions.QiitaDBOperationNotPermittedError( + "Can't delete analysis %d, has artifacts attached" + % _id) + sql = "DELETE FROM qiita.analysis_filepath WHERE {0} = %s".format( cls._analysis_id_column) args = [_id] qdb.sql_connection.TRN.add(sql, args) - sql = "DELETE FROM qiita.analysis_workflow WHERE {0} = %s".format( - cls._analysis_id_column) - qdb.sql_connection.TRN.add(sql, args) - sql = "DELETE FROM qiita.analysis_portal WHERE {0} = %s".format( cls._analysis_id_column) qdb.sql_connection.TRN.add(sql, args) @@ -221,7 +231,7 @@ def delete(cls, _id): cls._analysis_id_column) qdb.sql_connection.TRN.add(sql, args) - sql = """DELETE FROM qiita.collection_analysis + sql = """DELETE FROM qiita.analysis_processing_job WHERE {0} = %s""".format(cls._analysis_id_column) qdb.sql_connection.TRN.add(sql, args) @@ -259,7 +269,6 @@ def exists(cls, analysis_id): qdb.sql_connection.TRN.add(sql, [analysis_id, qiita_config.portal]) return qdb.sql_connection.TRN.execute_fetchlast() - # ---- Properties ---- @property def owner(self): """The owner of the analysis @@ -346,7 +355,6 @@ def description(self, description): Analysis is public """ with qdb.sql_connection.TRN: - self._lock_check() sql = """UPDATE qiita.{0} SET description = %s WHERE analysis_id = %s""".format(self._table) qdb.sql_connection.TRN.add(sql, [description, self._id]) @@ -370,34 +378,6 @@ def samples(self): qdb.sql_connection.TRN.add(sql, [self._id]) return dict(qdb.sql_connection.TRN.execute_fetchindex()) - @property - def dropped_samples(self): - """The samples that were selected but dropped in processing - - Returns - ------- - dict of sets - Format is {artifact_id: {sample_id, sample_id, ...}, ...} - """ - with qdb.sql_connection.TRN: - bioms = self.biom_tables - if not bioms: - return {} - - # get all samples selected for the analysis, converting lists to - # sets for fast searching. Overhead less this way - # for large analyses - all_samples = {k: set(v) for k, v in viewitems(self.samples)} - - for biom, filepath in viewitems(bioms): - table = load_table(filepath) - ids = set(table.ids()) - for k in all_samples: - all_samples[k] = all_samples[k] - ids - - # what's left are unprocessed samples, so return - return all_samples - @property def data_types(self): """Returns all data types used in the analysis @@ -434,57 +414,14 @@ def shared_with(self): for uid in qdb.sql_connection.TRN.execute_fetchflatten()] @property - def all_associated_filepath_ids(self): - """Get all associated filepath_ids - - Returns - ------- - list - """ + def artifacts(self): with qdb.sql_connection.TRN: - sql = """SELECT filepath_id - FROM qiita.filepath - JOIN qiita.analysis_filepath USING (filepath_id) + sql = """SELECT artifact_id + FROM qiita.analysis_artifact WHERE analysis_id = %s""" - qdb.sql_connection.TRN.add(sql, [self._id]) - filepaths = set(qdb.sql_connection.TRN.execute_fetchflatten()) - - sql = """SELECT filepath_id - FROM qiita.analysis_job - JOIN qiita.job USING (job_id) - JOIN qiita.job_results_filepath USING (job_id) - JOIN qiita.filepath USING (filepath_id) - WHERE analysis_id = %s""" - qdb.sql_connection.TRN.add(sql, [self._id]) - return filepaths.union( - qdb.sql_connection.TRN.execute_fetchflatten()) - - @property - def biom_tables(self): - """The biom tables of the analysis - - Returns - ------- - dict - Dictonary in the form {data_type: full BIOM filepath} - """ - fps = [(_id, fp) for _id, fp, ftype in qdb.util.retrieve_filepaths( - "analysis_filepath", "analysis_id", self._id) - if ftype == 'biom'] - - if fps: - fps_ids = [f[0] for f in fps] - with qdb.sql_connection.TRN: - sql = """SELECT filepath_id, data_type FROM qiita.filepath - JOIN qiita.analysis_filepath USING (filepath_id) - JOIN qiita.data_type USING (data_type_id) - WHERE filepath_id IN %s""" - qdb.sql_connection.TRN.add(sql, [tuple(fps_ids)]) - data_types = dict(qdb.sql_connection.TRN.execute_fetchindex()) - - return {data_types[_id]: f for _id, f in fps} - else: - return {} + qdb.sql_connection.TRN.add(sql, [self.id]) + return [qdb.artifact.Artifact(aid) + for aid in qdb.sql_connection.TRN.execute_fetchflatten()] @property def mapping_file(self): @@ -524,65 +461,21 @@ def tgz(self): else: return None - @property - def step(self): - """Returns the current step of the analysis - - Returns - ------- - str - The current step of the analysis - - Raises - ------ - ValueError - If the step is not set up - """ - with qdb.sql_connection.TRN: - self._lock_check() - sql = """SELECT step FROM qiita.analysis_workflow - WHERE analysis_id = %s""" - qdb.sql_connection.TRN.add(sql, [self._id]) - try: - return qdb.sql_connection.TRN.execute_fetchlast() - except IndexError: - raise ValueError("Step not set yet!") - - @step.setter - def step(self, value): - with qdb.sql_connection.TRN: - self._lock_check() - sql = """SELECT EXISTS( - SELECT analysis_id - FROM qiita.analysis_workflow - WHERE analysis_id = %s)""" - qdb.sql_connection.TRN.add(sql, [self._id]) - step_exists = qdb.sql_connection.TRN.execute_fetchlast() - - if step_exists: - sql = """UPDATE qiita.analysis_workflow SET step = %s - WHERE analysis_id = %s""" - else: - sql = """INSERT INTO qiita.analysis_workflow - (step, analysis_id) - VALUES (%s, %s)""" - qdb.sql_connection.TRN.add(sql, [value, self._id]) - qdb.sql_connection.TRN.execute() - @property def jobs(self): - """A list of jobs included in the analysis + """The jobs generating the initial artifacts for the analysis Returns ------- - list of qiita_db.job.Job + list of qiita_db.processing_job.Processing_job Job ids for jobs in analysis. Empty list if no jobs attached. """ with qdb.sql_connection.TRN: - sql = """SELECT job_id FROM qiita.analysis_job - WHERE analysis_id = %s""".format(self._table) + sql = """SELECT processing_job_id + FROM qiita.analysis_processing_job + WHERE analysis_id = %s""" qdb.sql_connection.TRN.add(sql, [self._id]) - return [qdb.job.Job(jid) + return [qdb.processing_job.ProcessingJob(jid) for jid in qdb.sql_connection.TRN.execute_fetchflatten()] @property @@ -619,22 +512,66 @@ def pmid(self, pmid): An analysis should only ever have one PMID attached to it. """ with qdb.sql_connection.TRN: - self._lock_check() sql = """UPDATE qiita.{0} SET pmid = %s WHERE analysis_id = %s""".format(self._table) qdb.sql_connection.TRN.add(sql, [pmid, self._id]) qdb.sql_connection.TRN.execute() - # @property - # def parent(self): - # """Returns the id of the parent analysis this was forked from""" - # return QiitaDBNotImplementedError() + @property + def can_be_publicized(self): + """Returns whether the analysis can be made public + + Returns + ------- + bool + Whether the analysis can be publicized or not + """ + # The analysis can be made public if all the artifacts used + # to get the samples from are public + with qdb.sql_connection.TRN: + sql = """SELECT DISTINCT artifact_id + FROM qiita.analysis_sample + WHERE analysis_id = %s""" + qdb.sql_connection.TRN.add(sql, [self.id]) + return all( + [qdb.artifact.Artifact(aid).visibility == 'public' + for aid in qdb.sql_connection.TRN.execute_fetchflatten()]) + + def add_artifact(self, artifact): + """Adds an artifact to the analysis + + Parameters + ---------- + artifact : qiita_db.artifact.Artifact + The artifact to be added + """ + with qdb.sql_connection.TRN: + sql = """INSERT INTO qiita.analysis_artifact + (analysis_id, artifact_id) + SELECT %s, %s + WHERE NOT EXISTS(SELECT * + FROM qiita.analysis_artifact + WHERE analysis_id = %s + AND artifact_id = %s)""" + qdb.sql_connection.TRN.add(sql, [self.id, artifact.id, + self.id, artifact.id]) - # @property - # def children(self): - # return QiitaDBNotImplementedError() + def set_error(self, error_msg): + """Sets the analysis error + + Parameters + ---------- + error_msg : str + The error message + """ + with qdb.sql_connection.TRN: + le = qdb.logger.LogEntry.create('Runtime', error_msg) + sql = """UPDATE qiita.analysis + SET logging_id = %s + WHERE analysis_id = %s""" + qdb.sql_connection.TRN.add(sql, [le.id, self.id]) + qdb.sql_connection.TRN.execute() - # ---- Functions ---- def has_access(self, user): """Returns whether the given user has access to the analysis @@ -656,6 +593,24 @@ def has_access(self, user): return self in Analysis.get_by_status('public') | \ user.private_analyses | user.shared_analyses + def can_edit(self, user): + """Returns whether the given user can edit the analysis + + Parameters + ---------- + user : User object + User we are checking edit permissions for + + Returns + ------- + bool + Whether user can edit the study or not + """ + # The analysis is editable only if the user is the owner, is in the + # shared list or the user is an admin + return (user.level in {'superuser', 'admin'} or self.owner == user or + user in self.shared_with) + def summary_data(self): """Return number of studies, artifacts, and samples selected @@ -707,6 +662,21 @@ def unshare(self, user): qdb.sql_connection.TRN.add(sql, [self._id, user.id]) qdb.sql_connection.TRN.execute() + def _lock_samples(self): + """Only dflt analyses can have samples added/removed + + Raises + ------ + qiita_db.exceptions.QiitaDBOperationNotPermittedError + If the analysis is not a default analysis + """ + with qdb.sql_connection.TRN: + sql = "SELECT dflt FROM qiita.analysis WHERE analysis_id = %s" + qdb.sql_connection.TRN.add(sql, [self.id]) + if not qdb.sql_connection.TRN.execute_fetchlast(): + raise qdb.exceptions.QiitaDBOperationNotPermittedError( + "Can't add/remove samples from this analysis") + def add_samples(self, samples): """Adds samples to the analysis @@ -717,7 +687,7 @@ def add_samples(self, samples): {artifact_id: [sample1, sample2, ...], ...} """ with qdb.sql_connection.TRN: - self._lock_check() + self._lock_samples() for aid, samps in viewitems(samples): # get previously selected samples for aid and filter them out @@ -755,7 +725,7 @@ def remove_samples(self, artifacts=None, samples=None): artifacts """ with qdb.sql_connection.TRN: - self._lock_check() + self._lock_samples() if artifacts and samples: sql = """DELETE FROM qiita.analysis_sample WHERE analysis_id = %s @@ -781,79 +751,29 @@ def remove_samples(self, artifacts=None, samples=None): qdb.sql_connection.TRN.add(sql, args, many=True) qdb.sql_connection.TRN.execute() - def generate_tgz(self): - with qdb.sql_connection.TRN: - fps_ids = self.all_associated_filepath_ids - if not fps_ids: - raise qdb.exceptions.QiitaDBError( - "The analysis %s do not have files attached, " - "can't create the tgz file" % self.id) - - sql = """SELECT filepath, data_directory_id FROM qiita.filepath - WHERE filepath_id IN %s""" - qdb.sql_connection.TRN.add(sql, [tuple(fps_ids)]) - - full_fps = [join(qdb.util.get_mountpoint_path_by_id(mid), f) - for f, mid in - qdb.sql_connection.TRN.execute_fetchindex()] - - _, analysis_mp = qdb.util.get_mountpoint('analysis')[0] - tgz = join(analysis_mp, '%d_files.tgz' % self.id) - try: - with taropen(tgz, "w:gz") as tar: - for f in full_fps: - tar.add(f, arcname=basename(f)) - error_txt = '' - return_value = 0 - except Exception as e: - error_txt = str(e) - return_value = 1 - - if return_value == 0: - self._add_file(tgz, 'tgz') - - return '', error_txt, return_value - - def build_files(self, - rarefaction_depth=None, - merge_duplicated_sample_ids=False): + def build_files(self, merge_duplicated_sample_ids): """Builds biom and mapping files needed for analysis Parameters ---------- - rarefaction_depth : int, optional - Defaults to ``None``. If ``None``, do not rarefy. Otherwise, rarefy - all samples to this number of observations - merge_duplicated_sample_ids : bool, optional + merge_duplicated_sample_ids : bool If the duplicated sample ids in the selected studies should be - merged or prepended with the artifact ids. False (default) prepends + merged or prepended with the artifact ids. If false prepends the artifact id - Raises - ------ - TypeError - If `rarefaction_depth` is not an integer - ValueError - If `rarefaction_depth` is less than or equal to zero - Notes ----- Creates biom tables for each requested data type Creates mapping file for requested samples """ with qdb.sql_connection.TRN: - if rarefaction_depth is not None: - if type(rarefaction_depth) is not int: - raise TypeError("rarefaction_depth must be in integer") - if rarefaction_depth <= 0: - raise ValueError( - "rarefaction_depth must be greater than 0") - # in practice we could retrieve samples in each of the following # calls but this will mean calling the DB multiple times and will # make testing much harder as we will need to have analyses at # different stages and possible errors. samples = self.samples + # gettin the info of all the artifacts to save SQL time + bioms_info = qdb.util.get_artifacts_information(samples.keys()) # figuring out if we are going to have duplicated samples, again # doing it here cause it's computational cheaper @@ -863,20 +783,29 @@ def build_files(self, # are going to create rename_dup_samples = False grouped_samples = {} - for k, v in viewitems(samples): - a = qdb.artifact.Artifact(k) - p = a.processing_parameters - if p is not None and p.command is not None: - ref = (str(p.values['reference']) - if 'reference' in p.values else 'na') - cid = str(p.command.id) + for aid, asamples in viewitems(samples): + # find the artifat info, [0] there should be only 1 info + ainfo = [bi for bi in bioms_info + if bi['artifact_id'] == aid][0] + + data_type = ainfo['data_type'] + # algorithm is: processing_method | parent_processing, just + # keeping processing_method + algorithm = ainfo['algorithm'].split('|')[0].strip() + files = ainfo['files'] + + l = "%s || %s" % (data_type, algorithm) + # deblur special case, we need to account for file name + if 'deblur-workflow' in algorithm: + # [0] there is always just one biom + l += " || %s" % [f for f in files + if f.endswith('.biom')][0] else: - ref = 'na' - cid = 'na' - l = "%s.%s.%s" % (a.data_type, ref, cid) + l += " ||" + if l not in grouped_samples: grouped_samples[l] = [] - grouped_samples[l].append((k, v)) + grouped_samples[l].append((aid, asamples)) # 2. if rename_dup_samples is still False, make sure that we don't # need to rename samples by checking that there are not # duplicated samples per group @@ -897,18 +826,21 @@ def build_files(self, dup_samples = dup_samples | s self._build_mapping_file(samples, rename_dup_samples) - self._build_biom_tables(grouped_samples, rarefaction_depth, - rename_dup_samples) + biom_files = self._build_biom_tables( + grouped_samples, rename_dup_samples) - def _build_biom_tables(self, grouped_samples, rarefaction_depth=None, - rename_dup_samples=False): + return biom_files + + def _build_biom_tables(self, grouped_samples, rename_dup_samples=False): """Build tables and add them to the analysis""" with qdb.sql_connection.TRN: base_fp = qdb.util.get_work_base_dir() - _, base_fp = qdb.util.get_mountpoint(self._table)[0] + biom_files = [] for label, tables in viewitems(grouped_samples): - data_type, reference_id, command_id = label.split('.') + data_type, algorithm, files = [ + l.strip() for l in label.split('||')] + new_table = None artifact_ids = [] for aid, samples in tables: @@ -955,31 +887,19 @@ def _build_biom_tables(self, grouped_samples, rarefaction_depth=None, raise RuntimeError("All samples filtered out from " "analysis due to rarefaction level") - # add the metadata column for study the samples come from, - # this is useful in case the user download the bioms - study_md = {'study': artifact.study.title, - 'artifact_ids': ', '.join(artifact_ids), - 'reference_id': reference_id, - 'command_id': command_id} - samples_md = {sid: study_md for sid in new_table.ids()} - new_table.add_metadata(samples_md, axis='sample') - - if rarefaction_depth is not None: - new_table = new_table.subsample(rarefaction_depth) - if len(new_table.ids()) == 0: - raise RuntimeError( - "All samples filtered out due to rarefacion level") - # write out the file - fn = "%d_analysis_dt-%s_r-%s_c-%s.biom" % ( - self._id, data_type, reference_id, command_id) + data_type = sub('[^0-9a-zA-Z]+', '', data_type) + algorithm = sub('[^0-9a-zA-Z]+', '', algorithm) + files = sub('[^0-9a-zA-Z]+', '', files) + info = "%s_%s_%s" % (data_type, algorithm, files) + fn = "%d_analysis_%s.biom" % (self._id, info) biom_fp = join(base_fp, fn) with biom_open(biom_fp, 'w') as f: new_table.to_hdf5( - f, "Generated by Qiita. Analysis %d Datatype %s " - "Reference %s Command %s" % (self._id, data_type, - reference_id, command_id)) - self._add_file(fn, "biom", data_type=data_type) + f, "Generated by Qiita, analysis id: %d, info: %s" % ( + self._id, label)) + biom_files.append((data_type, biom_fp)) + return biom_files def _build_mapping_file(self, samples, rename_dup_samples=False): """Builds the combined mapping file for all samples @@ -1067,256 +987,3 @@ def _add_file(self, filename, filetype, data_type=None): VALUES (%s, %s{1})""".format(col, dtid) qdb.sql_connection.TRN.add(sql, [self._id, fpid]) qdb.sql_connection.TRN.execute() - - -class Collection(qdb.base.QiitaStatusObject): - """ - Analysis overview object to track a multi-analysis collection. - - Attributes - ---------- - name: str - Name of the Collection - description: str - Description of what the collection is investigating - owner: User object - Owner of the Collection - analyses: list of Analysis Objects - all analyses that are part of the collection - highlights : list of Job objects - Important job results related to the collection - - Methods - ------- - add_analysis - remove_analysis - highlight_job - remove_highlight - share - unshare - """ - _table = "collection" - _analysis_table = "collection_analysis" - _highlight_table = "collection_job" - _share_table = "collection_users" - - def _status_setter_checks(self): - r"""Perform a check to make sure not setting status away from public - """ - if self.check_status(("public", )): - raise qdb.exceptions.QiitaDBStatusError( - "Illegal operation on public collection!") - - @classmethod - def create(cls, owner, name, description=None): - """Creates a new collection on the database - - Parameters - ---------- - owner : User object - Owner of the collection - name : str - Name of the collection - description : str, optional - Brief description of the collecton's overarching goal - """ - with qdb.sql_connection.TRN: - sql = """INSERT INTO qiita.{0} (email, name, description) - VALUES (%s, %s, %s) - RETURNING collection_id""".format(cls._table) - qdb.sql_connection.TRN.add(sql, [owner.id, name, description]) - c_id = qdb.sql_connection.TRN.execute_fetchlast() - - return cls(c_id) - - @classmethod - def delete(cls, id_): - """Deletes a collection from the database - - Parameters - ---------- - id_ : int - ID of the collection to delete - - Raises - ------ - QiitaDBStatusError - Trying to delete a public collection - """ - with qdb.sql_connection.TRN: - if cls(id_).status == "public": - raise qdb.exceptions.QiitaDBStatusError( - "Can't delete public collection!") - - sql = "DELETE FROM qiita.{0} WHERE collection_id = %s" - for table in (cls._analysis_table, cls._highlight_table, - cls._share_table, cls._table): - qdb.sql_connection.TRN.add(sql.format(table), [id_]) - - qdb.sql_connection.TRN.execute() - - # --- Properties --- - @property - def name(self): - with qdb.sql_connection.TRN: - sql = "SELECT name FROM qiita.{0} WHERE collection_id = %s".format( - self._table) - qdb.sql_connection.TRN.add(sql, [self._id]) - return qdb.sql_connection.TRN.execute_fetchlast() - - @name.setter - def name(self, value): - with qdb.sql_connection.TRN: - self._status_setter_checks() - - sql = """UPDATE qiita.{0} SET name = %s - WHERE collection_id = %s""".format(self._table) - qdb.sql_connection.TRN.add(sql, [value, self._id]) - qdb.sql_connection.TRN.execute() - - @property - def description(self): - with qdb.sql_connection.TRN: - sql = """SELECT description FROM qiita.{0} - WHERE collection_id = %s""".format(self._table) - qdb.sql_connection.TRN.add(sql, [self._id]) - return qdb.sql_connection.TRN.execute_fetchlast() - - @description.setter - def description(self, value): - with qdb.sql_connection.TRN: - self._status_setter_checks() - - sql = """UPDATE qiita.{0} SET description = %s - WHERE collection_id = %s""".format(self._table) - qdb.sql_connection.TRN.add(sql, [value, self._id]) - qdb.sql_connection.TRN.execute() - - @property - def owner(self): - with qdb.sql_connection.TRN: - sql = """SELECT email FROM qiita.{0} - WHERE collection_id = %s""".format(self._table) - qdb.sql_connection.TRN.add(sql, [self._id]) - return qdb.user.User(qdb.sql_connection.TRN.execute_fetchlast()) - - @property - def analyses(self): - with qdb.sql_connection.TRN: - sql = """SELECT analysis_id FROM qiita.{0} - WHERE collection_id = %s""".format(self._analysis_table) - qdb.sql_connection.TRN.add(sql, [self._id]) - return [Analysis(aid) - for aid in qdb.sql_connection.TRN.execute_fetchflatten()] - - @property - def highlights(self): - with qdb.sql_connection.TRN: - sql = """SELECT job_id FROM qiita.{0} - WHERE collection_id = %s""".format(self._highlight_table) - qdb.sql_connection.TRN.add(sql, [self._id]) - return [qdb.job.Job(jid) - for jid in qdb.sql_connection.TRN.execute_fetchflatten()] - - @property - def shared_with(self): - with qdb.sql_connection.TRN: - sql = """SELECT email FROM qiita.{0} - WHERE collection_id = %s""".format(self._share_table) - qdb.sql_connection.TRN.add(sql, [self._id]) - return [qdb.user.User(uid) - for uid in qdb.sql_connection.TRN.execute_fetchflatten()] - - # --- Functions --- - def add_analysis(self, analysis): - """Adds an analysis to the collection object - - Parameters - ---------- - analysis : Analysis object - """ - with qdb.sql_connection.TRN: - self._status_setter_checks() - - sql = """INSERT INTO qiita.{0} (analysis_id, collection_id) - VALUES (%s, %s)""".format(self._analysis_table) - qdb.sql_connection.TRN.add(sql, [analysis.id, self._id]) - qdb.sql_connection.TRN.execute() - - def remove_analysis(self, analysis): - """Remove an analysis from the collection object - - Parameters - ---------- - analysis : Analysis object - """ - with qdb.sql_connection.TRN: - self._status_setter_checks() - - sql = """DELETE FROM qiita.{0} - WHERE analysis_id = %s - AND collection_id = %s""".format(self._analysis_table) - qdb.sql_connection.TRN.add(sql, [analysis.id, self._id]) - qdb.sql_connection.TRN.execute() - - def highlight_job(self, job): - """Marks a job as important to the collection - - Parameters - ---------- - job : Job object - """ - with qdb.sql_connection.TRN: - self._status_setter_checks() - - sql = """INSERT INTO qiita.{0} (job_id, collection_id) - VALUES (%s, %s)""".format(self._highlight_table) - qdb.sql_connection.TRN.add(sql, [job.id, self._id]) - qdb.sql_connection.TRN.execute() - - def remove_highlight(self, job): - """Removes job importance from the collection - - Parameters - ---------- - job : Job object - """ - with qdb.sql_connection.TRN: - self._status_setter_checks() - - sql = """DELETE FROM qiita.{0} - WHERE job_id = %s - AND collection_id = %s""".format(self._highlight_table) - qdb.sql_connection.TRN.add(sql, [job.id, self._id]) - qdb.sql_connection.TRN.execute() - - def share(self, user): - """Shares the collection with another user - - Parameters - ---------- - user : User object - """ - with qdb.sql_connection.TRN: - self._status_setter_checks() - - sql = """INSERT INTO qiita.{0} (email, collection_id) - VALUES (%s, %s)""".format(self._share_table) - qdb.sql_connection.TRN.add(sql, [user.id, self._id]) - qdb.sql_connection.TRN.execute() - - def unshare(self, user): - """Unshares the collection with another user - - Parameters - ---------- - user : User object - """ - with qdb.sql_connection.TRN: - self._status_setter_checks() - - sql = """DELETE FROM qiita.{0} - WHERE email = %s - AND collection_id = %s""".format(self._share_table) - qdb.sql_connection.TRN.add(sql, [user.id, self._id]) - qdb.sql_connection.TRN.execute() diff --git a/qiita_db/artifact.py b/qiita_db/artifact.py index ea16cccc1..1fca42f74 100644 --- a/qiita_db/artifact.py +++ b/qiita_db/artifact.py @@ -10,12 +10,17 @@ from future.utils import viewitems from itertools import chain from datetime import datetime -from os import remove +from os import remove, makedirs +from os.path import isfile, exists, relpath +from shutil import rmtree +from functools import partial import networkx as nx import qiita_db as qdb +from qiita_core.qiita_settings import qiita_config + class Artifact(qdb.base.QiitaObject): r"""Any kind of file (or group of files) stored in the system and its @@ -149,6 +154,25 @@ def create_type(name, description, can_be_submitted_to_ebi, [at_id, qdb.util.convert_to_id(fpt, 'filepath_type'), req] for fpt, req in filepath_types] qdb.sql_connection.TRN.add(sql, sql_args, many=True) + + # When creating a type is expected that a new mountpoint is created + # for that type, note that we are going to check if there is an + # extra path for the mountpoint, which is useful for the test + # environment + qc = qiita_config + mp = relpath(qc.working_dir, qc.base_data_dir).replace( + 'working_dir', '') + mp = mp + name if mp != '/' and mp != '' else name + sql = """INSERT INTO qiita.data_directory + (data_type, mountpoint, subdirectory, active) + VALUES (%s, %s, %s, %s)""" + qdb.sql_connection.TRN.add(sql, [name, mp, True, True]) + + # We are intersted in the dirpath + dp = qdb.util.get_mountpoint(name)[0][1] + if not exists(dp): + makedirs(dp) + qdb.sql_connection.TRN.execute() @classmethod @@ -208,17 +232,22 @@ def copy(cls, artifact, prep_template): @classmethod def create(cls, filepaths, artifact_type, name=None, prep_template=None, - parents=None, processing_parameters=None, move_files=True): + parents=None, processing_parameters=None, move_files=True, + analysis=None, data_type=None): r"""Creates a new artifact in the system The parameters depend on how the artifact was generated: - If the artifact was uploaded by the user, the parameter - `prep_template` should be provided and the parameters `parents` and - `processing_parameters` should not be provided. + `prep_template` should be provided and the parameters `parents`, + `processing_parameters` and `analysis` should not be provided. - If the artifact was generated by processing one or more artifacts, the parameters `parents` and `processing_parameters` - should be provided and the parameter `prep_template` should not - be provided. + should be provided and the parameters `prep_template` and + `analysis` should not be provided. + - If the artifact is the initial artifact of the analysis, the + parameters `analysis` and `data_type` should be provided and the + parameters `prep_template`, `parents` and `processing_parameters` + should not be provided. Parameters ---------- @@ -232,16 +261,25 @@ def create(cls, filepaths, artifact_type, name=None, prep_template=None, prep_template : qiita_db.metadata_template.PrepTemplate, optional If the artifact is being uploaded by the user, the prep template to which the artifact should be linked to. If not provided, - `parents` should be provided. + `parents` or `analysis` should be provided. parents : iterable of qiita_db.artifact.Artifact, optional The list of artifacts from which the new artifact has been - generated. If not provided, `prep_template` should be provided. + generated. If not provided, `prep_template` or `analysis` + should be provided. processing_parameters : qiita_db.software.Parameters, optional The processing parameters used to generate the new artifact from `parents`. It is required if `parents` is provided. It should - not be provided if `prep_template` is provided. + not be provided if `processing_parameters` is not provided. move_files : bool, optional If False the files will not be moved but copied + analysis : qiita_db.analysis.Analysis, optional + If the artifact is the inital artifact of an analysis, the analysis + to which the artifact belongs to. If not provided, `prep_template` + or `parents` should be provided. + data_type : str + The data_type of the artifact in the `analysis`. It is required if + `analysis` is provided. It should not be provided if `analysis` is + not provided. Returns ------- @@ -271,71 +309,131 @@ def create(cls, filepaths, artifact_type, name=None, prep_template=None, raise qdb.exceptions.QiitaDBArtifactCreationError( "at least one filepath is required.") - # Parents or prep template must be provided, but not both - if parents and prep_template: + # Check that the combination of parameters is correct + counts = (int(bool(parents or processing_parameters)) + + int(prep_template is not None) + + int(bool(analysis or data_type))) + if counts != 1: + # More than one parameter has been provided raise qdb.exceptions.QiitaDBArtifactCreationError( - "parents or prep_template should be provided but not both") - elif not (parents or prep_template): + "One and only one of parents, prep template or analysis must " + "be provided") + elif bool(parents) != bool(processing_parameters): + # When provided, parents and processing parameters both should be + # provided (this is effectively doing an XOR) raise qdb.exceptions.QiitaDBArtifactCreationError( - "at least parents or prep_template must be provided") - elif parents and not processing_parameters: - # If parents is provided, processing parameters should also be - # provided + "When provided, both parents and processing parameters should " + "be provided") + elif bool(analysis) != bool(data_type): + # When provided, analysis and data_type both should be + # provided (this is effectively doing an XOR) raise qdb.exceptions.QiitaDBArtifactCreationError( - "if parents is provided, processing_parameters should also be" - "provided.") - elif prep_template and processing_parameters: - # If prep_template is provided, processing_parameters should not be - # provided - raise qdb.exceptions.QiitaDBArtifactCreationError( - "if prep_template is provided, processing_parameters should " - "not be provided.") + "When provided, both analysis and data_type should " + "be provided") + + # There are three different ways of creating an Artifact, but all of + # them execute a set of common operations. Declare functions to avoid + # code duplication. These functions should not be used outside of the + # create function, hence declaring them here + def _common_creation_steps(atype, cmd_id, data_type, cmd_parameters): + gen_timestamp = datetime.now() + visibility_id = qdb.util.convert_to_id("sandbox", "visibility") + atype_id = qdb.util.convert_to_id(atype, "artifact_type") + dtype_id = qdb.util.convert_to_id(data_type, "data_type") + # Create the artifact row in the artifact table + sql = """INSERT INTO qiita.artifact + (generated_timestamp, command_id, data_type_id, + command_parameters, visibility_id, + artifact_type_id, submitted_to_vamps) + VALUES (%s, %s, %s, %s, %s, %s, %s) + RETURNING artifact_id""" + sql_args = [gen_timestamp, cmd_id, dtype_id, + cmd_parameters, visibility_id, atype_id, False] + qdb.sql_connection.TRN.add(sql, sql_args) + a_id = qdb.sql_connection.TRN.execute_fetchlast() + qdb.sql_connection.TRN.execute() - timestamp = datetime.now() + return cls(a_id) - with qdb.sql_connection.TRN: - visibility_id = qdb.util.convert_to_id("sandbox", "visibility") - artifact_type_id = qdb.util.convert_to_id( - artifact_type, "artifact_type") + def _associate_with_study(instance, study_id): + # Associate the artifact with the study + sql = """INSERT INTO qiita.study_artifact + (study_id, artifact_id) + VALUES (%s, %s)""" + sql_args = [study_id, instance.id] + qdb.sql_connection.TRN.add(sql, sql_args) + qdb.sql_connection.TRN.execute() - if parents: - # Check that all parents belong to the same study - studies = {p.study.id for p in parents} - if len(studies) > 1: - raise qdb.exceptions.QiitaDBArtifactCreationError( - "parents from multiple studies provided: %s" - % ', '.join(studies)) - study_id = studies.pop() + def _associate_with_analysis(instance, analysis_id): + # Associate the artifact with the analysis + sql = """INSERT INTO qiita.analysis_artifact + (analysis_id, artifact_id) + VALUES (%s, %s)""" + sql_args = [analysis_id, instance.id] + qdb.sql_connection.TRN.add(sql, sql_args) + qdb.sql_connection.TRN.execute() - # Check that all parents have the same data type + with qdb.sql_connection.TRN: + if parents: dtypes = {p.data_type for p in parents} - if len(dtypes) > 1: + # If an artifact has parents, it can be either from the + # processing pipeline or the analysis pipeline. Decide which + # one here + studies = {p.study for p in parents} + analyses = {p.analysis for p in parents} + studies.discard(None) + analyses.discard(None) + studies = {s.id for s in studies} + analyses = {a.id for a in analyses} + + # The first 2 cases should never happen, but it doesn't hurt + # to check them + len_studies = len(studies) + len_analyses = len(analyses) + if len_studies > 0 and len_analyses > 0: + raise qdb.exceptions.QiitaDBArtifactCreationError( + "All the parents from an artifact should be either " + "from the analysis pipeline or all from the processing" + " pipeline") + elif len_studies > 1 or len_studies > 1: raise qdb.exceptions.QiitaDBArtifactCreationError( - "parents have multiple data types: %s" - % ", ".join(dtypes)) - dtype_id = qdb.util.convert_to_id(dtypes.pop(), "data_type") - - # Create the artifact - sql = """INSERT INTO qiita.artifact - (generated_timestamp, command_id, data_type_id, - command_parameters, visibility_id, - artifact_type_id, submitted_to_vamps) - VALUES (%s, %s, %s, %s, %s, %s, %s) - RETURNING artifact_id""" - sql_args = [timestamp, processing_parameters.command.id, - dtype_id, processing_parameters.dump(), - visibility_id, artifact_type_id, False] - qdb.sql_connection.TRN.add(sql, sql_args) - a_id = qdb.sql_connection.TRN.execute_fetchlast() + "Parents from multiple studies/analyses provided. " + "Analyses: %s. Studies: %s." + % (', '.join(analyses), ', '.join(studies))) + elif len_studies == 1: + # This artifact is part of the processing pipeline + study_id = studies.pop() + # In the processing pipeline, artifacts can have only + # one dtype + if len(dtypes) > 1: + raise qdb.exceptions.QiitaDBArtifactCreationError( + "parents have multiple data types: %s" + % ", ".join(dtypes)) + + instance = _common_creation_steps( + artifact_type, processing_parameters.command.id, + dtypes.pop(), processing_parameters.dump()) + + _associate_with_study(instance, study_id) + else: + # This artifact is part of the analysis pipeline + analysis_id = analyses.pop() + # In the processing pipeline, artifact parents can have + # more than one data type + data_type = ("Multiomic" + if len(dtypes) > 1 else dtypes.pop()) + instance = _common_creation_steps( + artifact_type, processing_parameters.command.id, + data_type, processing_parameters.dump()) + _associate_with_analysis(instance, analysis_id) # Associate the artifact with its parents sql = """INSERT INTO qiita.parent_artifact (artifact_id, parent_id) VALUES (%s, %s)""" - sql_args = [(a_id, p.id) for p in parents] + sql_args = [(instance.id, p.id) for p in parents] qdb.sql_connection.TRN.add(sql, sql_args, many=True) - instance = cls(a_id) # inheriting visibility visibilities = {a.visibility for a in instance.parents} # set based on the "lowest" visibility @@ -345,42 +443,32 @@ def create(cls, filepaths, artifact_type, name=None, prep_template=None, instance.visibility = 'private' else: instance.visibility = 'public' - else: - dtype_id = qdb.util.convert_to_id(prep_template.data_type(), - "data_type") - # Create the artifact - sql = """INSERT INTO qiita.artifact - (generated_timestamp, visibility_id, - artifact_type_id, data_type_id, - submitted_to_vamps) - VALUES (%s, %s, %s, %s, %s) - RETURNING artifact_id""" - sql_args = [timestamp, visibility_id, artifact_type_id, - dtype_id, False] - qdb.sql_connection.TRN.add(sql, sql_args) - a_id = qdb.sql_connection.TRN.execute_fetchlast() + elif prep_template: + # This artifact is uploaded by the user in the + # processing pipeline + instance = _common_creation_steps( + artifact_type, None, prep_template.data_type(), None) # Associate the artifact with the prep template - instance = cls(a_id) prep_template.artifact = instance - study_id = prep_template.study_id - - # Associate the artifact with the study - sql = """INSERT INTO qiita.study_artifact (study_id, artifact_id) - VALUES (%s, %s)""" - sql_args = [study_id, a_id] - qdb.sql_connection.TRN.add(sql, sql_args) + # Associate the artifact with the study + _associate_with_study(instance, prep_template.study_id) + else: + # This artifact is an initial artifact of an analysis + instance = _common_creation_steps( + artifact_type, None, data_type, None) + # Associate the artifact with the analysis + analysis.add_artifact(instance) # Associate the artifact with its filepaths fp_ids = qdb.util.insert_filepaths( - filepaths, a_id, artifact_type, "filepath", + filepaths, instance.id, artifact_type, "filepath", move_files=move_files, copy=(not move_files)) sql = """INSERT INTO qiita.artifact_filepath (artifact_id, filepath_id) VALUES (%s, %s)""" - sql_args = [[a_id, fp_id] for fp_id in fp_ids] + sql_args = [[instance.id, fp_id] for fp_id in fp_ids] qdb.sql_connection.TRN.add(sql, sql_args, many=True) - qdb.sql_connection.TRN.execute() if name: instance.name = name @@ -505,6 +593,10 @@ def delete(cls, artifact_id): sql = "DELETE FROM qiita.study_artifact WHERE artifact_id = %s" qdb.sql_connection.TRN.add(sql, [artifact_id]) + # Detach the artifact from the analysis_artifact table + sql = "DELETE FROM qiita.analysis_artifact WHERE artifact_id = %s" + qdb.sql_connection.TRN.add(sql, [artifact_id]) + # Delete the row in the artifact table sql = "DELETE FROM qiita.artifact WHERE artifact_id = %s" qdb.sql_connection.TRN.add(sql, [artifact_id]) @@ -873,41 +965,55 @@ def html_summary_fp(self): return res - @html_summary_fp.setter - def html_summary_fp(self, value): + def set_html_summary(self, html_fp, support_dir=None): """Sets the HTML summary of the artifact Parameters ---------- - value : str + html_fp : str Path to the new HTML summary + support_dir : str + Path to the directory containing any support files needed by + the HTML file """ with qdb.sql_connection.TRN: - current = self.html_summary_fp - if current: + if self.html_summary_fp: # Delete the current HTML summary - fp_id = current[0] - fp = current[1] + to_delete_ids = [] + to_delete_fps = [] + for fp_id, fp, fp_type in self.filepaths: + if fp_type in ('html_summary', 'html_summary_dir'): + to_delete_ids.append([fp_id]) + to_delete_fps.append(fp) # From the artifact_filepath table sql = """DELETE FROM qiita.artifact_filepath WHERE filepath_id = %s""" - qdb.sql_connection.TRN.add(sql, [fp_id]) + qdb.sql_connection.TRN.add(sql, to_delete_ids, many=True) # From the filepath table sql = "DELETE FROM qiita.filepath WHERE filepath_id=%s" - qdb.sql_connection.TRN.add(sql, [fp_id]) + qdb.sql_connection.TRN.add(sql, to_delete_ids, many=True) # And from the filesystem only after the transaction is # successfully completed (after commit) - qdb.sql_connection.TRN.add_post_commit_func(remove, fp) + + def path_cleaner(fp): + if isfile(fp): + remove(fp) + else: + rmtree(fp) + qdb.sql_connection.TRN.add_post_commit_func( + partial(map, path_cleaner, to_delete_fps)) # Add the new HTML summary + filepaths = [(html_fp, 'html_summary')] + if support_dir is not None: + filepaths.append((support_dir, 'html_summary_dir')) fp_ids = qdb.util.insert_filepaths( - [(value, 'html_summary')], self.id, self.artifact_type, - "filepath") + filepaths, self.id, self.artifact_type, "filepath") sql = """INSERT INTO qiita.artifact_filepath (artifact_id, filepath_id) VALUES (%s, %s)""" - # We only inserted a single filepath, so using index 0 - qdb.sql_connection.TRN.add(sql, [self.id, fp_ids[0]]) + sql_args = [[self.id, id_] for id_ in fp_ids] + qdb.sql_connection.TRN.add(sql, sql_args, many=True) qdb.sql_connection.TRN.execute() @property @@ -1085,15 +1191,33 @@ def study(self): Returns ------- - qiita_db.study.Study - The study that owns the artifact + qiita_db.study.Study or None + The study that owns the artifact, if any """ with qdb.sql_connection.TRN: sql = """SELECT study_id FROM qiita.study_artifact WHERE artifact_id = %s""" qdb.sql_connection.TRN.add(sql, [self.id]) - return qdb.study.Study(qdb.sql_connection.TRN.execute_fetchlast()) + res = qdb.sql_connection.TRN.execute_fetchindex() + return qdb.study.Study(res[0][0]) if res else None + + @property + def analysis(self): + """The analysis to which the artifact belongs to + + Returns + ------- + qiita_db.analysis.Analysis or None + The analysis that owns the artifact, if any + """ + with qdb.sql_connection.TRN: + sql = """SELECT analysis_id + FROM qiita.analysis_artifact + WHERE artifact_id = %s""" + qdb.sql_connection.TRN.add(sql, [self.id]) + res = qdb.sql_connection.TRN.execute_fetchindex() + return qdb.analysis.Analysis(res[0][0]) if res else None def jobs(self, cmd=None, status=None): """Jobs that used this artifact as input diff --git a/qiita_db/base.py b/qiita_db/base.py index 084333616..b2fadaccc 100644 --- a/qiita_db/base.py +++ b/qiita_db/base.py @@ -14,7 +14,6 @@ :toctree: generated/ QiitaObject - QiitaStatusObject """ # ----------------------------------------------------------------------------- @@ -220,106 +219,3 @@ def __hash__(self): def id(self): r"""The object id on the storage system""" return self._id - - -class QiitaStatusObject(QiitaObject): - r"""Base class for any qiita_db object with a status property - - Attributes - ---------- - status - - Methods - ------- - check_status - _status_setter_checks - """ - - @property - def status(self): - r"""String with the current status of the analysis""" - # Get the DB status of the object - with qdb.sql_connection.TRN: - sql = """SELECT status FROM qiita.{0}_status - WHERE {0}_status_id = ( - SELECT {0}_status_id FROM qiita.{0} - WHERE {0}_id = %s)""".format(self._table) - qdb.sql_connection.TRN.add(sql, [self._id]) - return qdb.sql_connection.TRN.execute_fetchlast() - - def _status_setter_checks(self): - r"""Perform any extra checks that needed to be done before setting the - object status on the database. Should be overwritten by the subclasses - """ - raise qdb.exceptions.QiitaDBNotImplementedError() - - @status.setter - def status(self, status): - r"""Change the status of the analysis - - Parameters - ---------- - status: str - The new object status - """ - with qdb.sql_connection.TRN: - # Perform any extra checks needed before - # we update the status in the DB - self._status_setter_checks() - - # Update the status of the object - sql = """UPDATE qiita.{0} SET {0}_status_id = ( - SELECT {0}_status_id FROM qiita.{0}_status - WHERE status = %s) - WHERE {0}_id = %s""".format(self._table) - qdb.sql_connection.TRN.add(sql, [status, self._id]) - qdb.sql_connection.TRN.execute() - - def check_status(self, status, exclude=False): - r"""Checks status of object. - - Parameters - ---------- - status: iterable - Iterable of statuses to check against. - exclude: bool, optional - If True, will check that database status is NOT one of the statuses - passed. Default False. - - Returns - ------- - bool - True if the object status is in the desired set of statuses. False - otherwise. - - Notes - ----- - This assumes the following database setup is in place: For a given - cls._table setting, such as "table", there is a corresponding table - with the name "table_status" holding the status entries allowed. This - table has a column called "status" that holds the values corresponding - to what is passed as status in this function and a column - "table_status_id" corresponding to the column of the same name in - "table". - - Table setup: - foo: foo_status_id ----> foo_status: foo_status_id, status - """ - with qdb.sql_connection.TRN: - # Get all available statuses - sql = "SELECT DISTINCT status FROM qiita.{0}_status".format( - self._table) - qdb.sql_connection.TRN.add(sql) - # We need to access to the results of the last SQL query, - # hence indexing using -1 - avail_status = [ - x[0] for x in qdb.sql_connection.TRN.execute_fetchindex()] - - # Check that all the provided status are valid status - if set(status).difference(avail_status): - raise ValueError("%s are not valid status values" - % set(status).difference(avail_status)) - - # Get the DB status of the object - dbstatus = self.status - return dbstatus not in status if exclude else dbstatus in status diff --git a/qiita_db/environment_manager.py b/qiita_db/environment_manager.py index 7a3aa3499..b6981f045 100644 --- a/qiita_db/environment_manager.py +++ b/qiita_db/environment_manager.py @@ -405,4 +405,4 @@ def patch(patches_dir=PATCHES_DIR, verbose=False, test=False): if verbose: print('\t\tApplying python patch %s...' % py_patch_filename) - execfile(py_patch_fp) + execfile(py_patch_fp, {}) diff --git a/qiita_db/handlers/analysis.py b/qiita_db/handlers/analysis.py new file mode 100644 index 000000000..4a311a512 --- /dev/null +++ b/qiita_db/handlers/analysis.py @@ -0,0 +1,69 @@ +# ----------------------------------------------------------------------------- +# Copyright (c) 2014--, The Qiita Development Team. +# +# Distributed under the terms of the BSD 3-clause License. +# +# The full license is in the file LICENSE, distributed with this software. +# ----------------------------------------------------------------------------- + +from tornado.web import HTTPError + +import qiita_db as qdb +from .oauth2 import OauthBaseHandler, authenticate_oauth + + +def _get_analysis(a_id): + """Returns the analysis with the given `a_id` if it exists + + Parameters + ---------- + a_id : str + The analysis id + + Returns + ------- + qiita_db.analysis.Analysis + The requested analysis + + Raises + ------ + HTTPError + If the analysis does not exist, with error code 404 + If there is a problem instantiating the analysis, with error code 500 + """ + try: + a_id = int(a_id) + a = qdb.analysis.Analysis(a_id) + except qdb.exceptions.QiitaDBUnknownIDError: + raise HTTPError(404) + except Exception as e: + raise HTTPError(500, 'Error instantiating analysis %s: %s' + % (a_id, str(e))) + return a + + +class APIAnalysisMetadataHandler(OauthBaseHandler): + @authenticate_oauth + def get(self, analysis_id): + """Retrieves the analysis metadata + + Parameters + ---------- + analysis_id : str + The id of the analysis whose information is being retrieved + + Returns + ------- + dict + The contents of the analysis keyed by sample id + """ + with qdb.sql_connection.TRN: + a = _get_analysis(analysis_id) + mf_fp = a.mapping_file + response = None + if mf_fp is not None: + df = qdb.metadata_template.util.load_template_to_dataframe( + mf_fp, index='#SampleID') + response = df.to_dict(orient='index') + + self.write(response) diff --git a/qiita_db/handlers/artifact.py b/qiita_db/handlers/artifact.py index 82732863f..cd99b8da4 100644 --- a/qiita_db/handlers/artifact.py +++ b/qiita_db/handlers/artifact.py @@ -79,6 +79,8 @@ def get(self, artifact_id): """ with qdb.sql_connection.TRN: artifact = _get_artifact(artifact_id) + study = artifact.study + analysis = artifact.analysis response = { 'name': artifact.name, 'timestamp': str(artifact.timestamp), @@ -89,7 +91,8 @@ def get(self, artifact_id): 'can_be_submitted_to_vamps': artifact.can_be_submitted_to_vamps, 'prep_information': [p.id for p in artifact.prep_templates], - 'study': artifact.study.id} + 'study': study.id if study else None, + 'analysis': analysis.id if analysis else None} params = artifact.processing_parameters response['processing_parameters'] = ( params.values if params is not None else None) @@ -128,8 +131,17 @@ def patch(self, artifact_id): raise HTTPError(400, 'Incorrect path parameter value') else: artifact = _get_artifact(artifact_id) + + try: + html_data = loads(req_value) + html_fp = html_data['html'] + html_dir = html_data['dir'] + except ValueError: + html_fp = req_value + html_dir = None + try: - artifact.html_summary_fp = req_value + artifact.set_html_summary(html_fp, html_dir) except Exception as e: raise HTTPError(500, str(e)) else: @@ -166,15 +178,21 @@ def post(self): """ filepaths = loads(self.get_argument('filepaths')) artifact_type = self.get_argument('type') - prep_template = self.get_argument('prep') + prep_template = self.get_argument('prep', None) + analysis = self.get_argument('analysis', None) name = self.get_argument('name', None) + dtype = self.get_argument('data_type', None) - if prep_template: + if prep_template is not None: prep_template = qdb.metadata_template.prep_template.PrepTemplate( prep_template) + dtype = None + if analysis is not None: + analysis = qdb.analysis.Analysis(analysis) a = qdb.artifact.Artifact.create( - filepaths, artifact_type, name=name, prep_template=prep_template) + filepaths, artifact_type, name=name, prep_template=prep_template, + analysis=analysis, data_type=dtype) self.write({'artifact': a.id}) diff --git a/qiita_db/handlers/plugin.py b/qiita_db/handlers/plugin.py index 5850df51c..df4e1e2ff 100644 --- a/qiita_db/handlers/plugin.py +++ b/qiita_db/handlers/plugin.py @@ -100,16 +100,23 @@ def post(self, name, version): cmd_desc = self.get_argument('description') req_params = loads(self.get_argument('required_parameters')) opt_params = loads(self.get_argument('optional_parameters')) + + for p_name, (p_type, dflt) in opt_params.items(): + if p_type.startswith('mchoice'): + opt_params[p_name] = [p_type, loads(dflt)] + outputs = self.get_argument('outputs', None) if outputs: outputs = loads(outputs) dflt_param_set = loads(self.get_argument('default_parameter_sets')) + analysis_only = self.get_argument('analysis_only', False) parameters = req_params parameters.update(opt_params) cmd = qdb.software.Command.create( - plugin, cmd_name, cmd_desc, parameters, outputs) + plugin, cmd_name, cmd_desc, parameters, outputs, + analysis_only=analysis_only) if dflt_param_set is not None: for name, vals in dflt_param_set.items(): @@ -221,4 +228,6 @@ def post(self): for fp in conf_files: s = qdb.software.Software.from_file(fp, update=True) s.activate() + s.register_commands() + self.finish() diff --git a/qiita_db/handlers/processing_job.py b/qiita_db/handlers/processing_job.py index efd5a94bd..84efc21af 100644 --- a/qiita_db/handlers/processing_job.py +++ b/qiita_db/handlers/processing_job.py @@ -11,7 +11,6 @@ from tornado.web import HTTPError -from qiita_core.qiita_settings import qiita_config import qiita_db as qdb from .oauth2 import OauthBaseHandler, authenticate_oauth @@ -59,13 +58,10 @@ def _job_completer(job_id, payload): completing the job """ import qiita_db as qdb - cmd = "%s '%s' %s %s '%s'" % (qiita_config.private_launcher, - qiita_config.qiita_env, 'complete_job', - job_id, payload) - std_out, std_err, return_value = qdb.processing_job._system_call(cmd) - if return_value != 0: - error = ("Can't submit private task 'complete job:\n" - "Std output:%s\nStd error:%s'" % (std_out, std_err)) + + success, error = qdb.processing_job.private_job_submitter( + "Complete job %s" % job_id, 'complete_job', [job_id, payload]) + if not success: job = qdb.processing_job.ProcessingJob(job_id) job.complete(False, error=error) diff --git a/qiita_db/handlers/tests/test_analysis.py b/qiita_db/handlers/tests/test_analysis.py new file mode 100644 index 000000000..2c20c64a3 --- /dev/null +++ b/qiita_db/handlers/tests/test_analysis.py @@ -0,0 +1,106 @@ +# ----------------------------------------------------------------------------- +# Copyright (c) 2014--, The Qiita Development Team. +# +# Distributed under the terms of the BSD 3-clause License. +# +# The full license is in the file LICENSE, distributed with this software. +# ----------------------------------------------------------------------------- + +from unittest import main, TestCase +from json import loads + +from tornado.web import HTTPError + +from qiita_db.handlers.tests.oauthbase import OauthTestingBase +from qiita_db.handlers.analysis import _get_analysis +import qiita_db as qdb + + +class UtilTests(TestCase): + def test_get_analysis(self): + obs = _get_analysis(1) + exp = qdb.analysis.Analysis(1) + self.assertEqual(obs, exp) + + # It doesn't exist + with self.assertRaises(HTTPError): + _get_analysis(100) + + +class APIAnalysisMetadataHandlerTests(OauthTestingBase): + def test_get_does_not_exist(self): + obs = self.get('/qiita_db/analysis/100/metadata/', headers=self.header) + self.assertEqual(obs.code, 404) + + def test_get_no_header(self): + obs = self.get('/qiita_db/analysis/1/metadata/') + self.assertEqual(obs.code, 400) + + def test_get(self): + obs = self.get('/qiita_db/analysis/1/metadata/', headers=self.header) + self.assertEqual(obs.code, 200) + + obs = loads(obs.body) + exp = ['1.SKM4.640180', '1.SKB8.640193', '1.SKD8.640184', + '1.SKM9.640192', '1.SKB7.640196'] + self.assertItemsEqual(obs, exp) + + exp = {'platform': 'Illumina', 'longitude': '95.5088566087', + 'experiment_center': 'ANL', 'center_name': 'ANL', + 'run_center': 'ANL', 'run_prefix': 's_G1_L001_sequences', + 'sample_type': 'ENVO:soil', + 'common_name': 'rhizosphere metagenome', 'samp_size': '.25,g', + 'has_extracted_data': 'True', 'water_content_soil': '0.101', + 'target_gene': '16S rRNA', + 'env_feature': 'ENVO:plant-associated habitat', + 'sequencing_meth': 'Sequencing by synthesis', + 'Description': 'Cannabis Soil Microbiome', 'run_date': '8/1/12', + 'qiita_owner': 'Dude', 'altitude': '0.0', + 'BarcodeSequence': 'TCGACCAAACAC', + 'env_biome': 'ENVO:Temperate grasslands, savannas, and ' + 'shrubland biome', + 'texture': '63.1 sand, 17.7 silt, 19.2 clay', + 'pcr_primers': 'FWD:GTGCCAGCMGCCGCGGTAA; ' + 'REV:GGACTACHVGGGTWTCTAAT', + 'experiment_title': 'Cannabis Soil Microbiome', + 'library_construction_protocol': + 'This analysis was done as in Caporaso et al 2011 Genome ' + 'research. The PCR primers (F515/R806) were developed ' + 'against the V4 region of the 16S rRNA (both bacteria and ' + 'archaea), which we determined would yield optimal ' + 'community clustering with reads of this length using a ' + 'procedure similar to that of ref. 15. [For reference, ' + 'this primer pair amplifies the region 533_786 in the ' + 'Escherichia coli strain 83972 sequence (greengenes ' + 'accession no. prokMSA_id:470367).] The reverse PCR primer ' + 'is barcoded with a 12-base error-correcting Golay code to ' + 'facilitate multiplexing of up to 1,500 samples per lane, ' + 'and both PCR primers contain sequencer adapter regions.', + 'experiment_design_description': + 'micro biome of soil and rhizosphere of cannabis plants ' + 'from CA', + 'study_center': 'CCME', 'physical_location': 'ANL', + 'qiita_prep_id': '1', 'taxon_id': '939928', + 'has_physical_specimen': 'True', 'ph': '6.82', + 'description_duplicate': 'Bucu Rhizo', + 'qiita_study_alias': 'Cannabis Soils', 'sample_center': 'ANL', + 'elevation': '114.0', 'illumina_technology': 'MiSeq', + 'assigned_from_geo': 'n', + 'collection_timestamp': '2011-11-11 13:00:00', + 'latitude': '31.7167821863', + 'LinkerPrimerSequence': 'GTGCCAGCMGCCGCGGTAA', + 'qiita_principal_investigator': 'PIDude', 'host_taxid': '3483', + 'samp_salinity': '7.44', 'host_subject_id': '1001:D2', + 'target_subfragment': 'V4', 'season_environment': 'winter', + 'temp': '15.0', 'emp_status': 'EMP', + 'country': 'GAZ:United States of America', + 'instrument_model': 'Illumina MiSeq', + 'qiita_study_title': 'Identification of the Microbiomes for ' + 'Cannabis Soils', + 'tot_nitro': '1.3', 'depth': '0.15', + 'anonymized_name': 'SKM4', 'tot_org_carb': '3.31'} + self.assertEqual(obs['1.SKM4.640180'], exp) + + +if __name__ == '__main__': + main() diff --git a/qiita_db/handlers/tests/test_artifact.py b/qiita_db/handlers/tests/test_artifact.py index dd64464c9..df3efc132 100644 --- a/qiita_db/handlers/tests/test_artifact.py +++ b/qiita_db/handlers/tests/test_artifact.py @@ -9,13 +9,16 @@ from unittest import main, TestCase from json import loads from functools import partial -from os.path import join, exists +from os.path import join, exists, isfile from os import close, remove -from tempfile import mkstemp +from shutil import rmtree +from tempfile import mkstemp, mkdtemp from json import dumps from tornado.web import HTTPError import pandas as pd +from biom import example_table as et +from biom.util import biom_open from qiita_db.handlers.tests.oauthbase import OauthTestingBase import qiita_db as qdb @@ -37,15 +40,16 @@ class ArtifactHandlerTests(OauthTestingBase): def setUp(self): super(ArtifactHandlerTests, self).setUp() - fd, self.html_fp = mkstemp(suffix=".html") - close(fd) - self._clean_up_files = [self.html_fp] + self._clean_up_files = [] def tearDown(self): super(ArtifactHandlerTests, self).tearDown() for fp in self._clean_up_files: if exists(fp): - remove(fp) + if isfile(fp): + remove(fp) + else: + rmtree(fp) def test_get_artifact_does_not_exist(self): obs = self.get('/qiita_db/artifacts/100/', headers=self.header) @@ -77,23 +81,71 @@ def test_get_artifact(self): 'is_submitted_to_vamps': None, 'prep_information': [1], 'study': 1, + 'analysis': None, 'processing_parameters': None, 'files': exp_fps} self.assertEqual(loads(obs.body), exp) + obs = self.get('/qiita_db/artifacts/9/', headers=self.header) + self.assertEqual(obs.code, 200) + db_test_raw_dir = qdb.util.get_mountpoint('analysis')[0][1] + path_builder = partial(join, db_test_raw_dir) + exp_fps = {"biom": [path_builder('1_analysis_18S.biom')]} + exp = { + 'name': 'noname', + 'visibility': 'sandbox', + 'type': 'BIOM', + 'data_type': '18S', + 'can_be_submitted_to_ebi': False, + 'ebi_run_accessions': None, + 'can_be_submitted_to_vamps': False, + 'is_submitted_to_vamps': None, + 'prep_information': [], + 'study': None, + 'analysis': 1, + 'processing_parameters': {'biom_table': 8, 'depth': 9000, + 'subsample_multinomial': False}, + 'files': exp_fps} + obs = loads(obs.body) + # The timestamp is genreated at patch time, so we can't check for it + del obs['timestamp'] + self.assertEqual(obs, exp) + def test_patch(self): + fd, html_fp = mkstemp(suffix=".html") + close(fd) + self._clean_up_files.append(html_fp) + # correct argument with a single HTML arguments = {'op': 'add', 'path': '/html_summary/', - 'value': self.html_fp} - self.assertIsNone(qdb.artifact.Artifact(1).html_summary_fp) + 'value': html_fp} + artifact = qdb.artifact.Artifact(1) + self.assertIsNone(artifact.html_summary_fp) obs = self.patch('/qiita_db/artifacts/1/', headers=self.header, data=arguments) self.assertEqual(obs.code, 200) - self.assertIsNotNone(qdb.artifact.Artifact(1).html_summary_fp) + self.assertIsNotNone(artifact.html_summary_fp) + + # Correct argument with an HMTL and a directory + fd, html_fp = mkstemp(suffix=".html") + close(fd) + self._clean_up_files.append(html_fp) + html_dir = mkdtemp() + self._clean_up_files.append(html_dir) + arguments = {'op': 'add', 'path': '/html_summary/', + 'value': dumps({'html': html_fp, 'dir': html_dir})} + obs = self.patch('/qiita_db/artifacts/1/', + headers=self.header, + data=arguments) + self.assertEqual(obs.code, 200) + self.assertIsNotNone(artifact.html_summary_fp) + html_dir = [fp for _, fp, fp_type in artifact.filepaths + if fp_type == 'html_summary_dir'] + self.assertEqual(len(html_dir), 1) # Wrong operation arguments = {'op': 'wrong', 'path': '/html_summary/', - 'value': self.html_fp} + 'value': html_fp} obs = self.patch('/qiita_db/artifacts/1/', headers=self.header, data=arguments) @@ -103,7 +155,7 @@ def test_patch(self): # Wrong path parameter arguments = {'op': 'add', 'path': '/wrong/', - 'value': self.html_fp} + 'value': html_fp} obs = self.patch('/qiita_db/artifacts/1/', headers=self.header, data=arguments) @@ -112,7 +164,7 @@ def test_patch(self): # Wrong value parameter arguments = {'op': 'add', 'path': '/html_summary/', - 'value': self.html_fp} + 'value': html_fp} obs = self.patch('/qiita_db/artifacts/1/', headers=self.header, data=arguments) @@ -180,6 +232,27 @@ def test_post(self): self._clean_up_files.extend([fp for _, fp, _ in a.filepaths]) self.assertEqual(a.name, "New test artifact") + def test_post_analysis(self): + fd, fp = mkstemp(suffix='_table.biom') + close(fd) + with biom_open(fp, 'w') as f: + et.to_hdf5(f, "test") + self._clean_up_files.append(fp) + + data = {'filepaths': dumps([(fp, 'biom')]), + 'type': "BIOM", + 'name': "New biom artifact", + 'analysis': 1, + 'data_type': '16S'} + obs = self.post('/apitest/artifact/', headers=self.header, data=data) + self.assertEqual(obs.code, 200) + obs = loads(obs.body) + self.assertEqual(obs.keys(), ['artifact']) + + a = qdb.artifact.Artifact(obs['artifact']) + self._clean_up_files.extend([afp for _, afp, _ in a.filepaths]) + self.assertEqual(a.name, "New biom artifact") + def test_post_error(self): data = {'filepaths': dumps([('Do not exist', 'raw_forward_seqs')]), 'type': "FASTQ", diff --git a/qiita_db/handlers/tests/test_plugin.py b/qiita_db/handlers/tests/test_plugin.py index 036a58f32..0fb108377 100644 --- a/qiita_db/handlers/tests/test_plugin.py +++ b/qiita_db/handlers/tests/test_plugin.py @@ -56,7 +56,9 @@ def test_get(self): 'for performing microbiome analysis from raw DNA ' 'sequencing data', 'commands': ['Split libraries FASTQ', 'Split libraries', - 'Pick closed-reference OTUs'], + 'Pick closed-reference OTUs', 'Summarize Taxa', + 'Beta Diversity', 'Alpha Rarefaction', + 'Single Rarefaction'], 'publications': [{'DOI': '10.1038/nmeth.f.303', 'PubMed': '20383131'}], 'default_workflows': ['FASTQ upstream workflow', @@ -74,9 +76,12 @@ def test_post(self): 'description': 'Command added for testing', 'required_parameters': dumps( {'in_data': ['artifact:["FASTA"]', None]}), - 'optional_parameters': dumps({'param1': ['string', ''], - 'param2': ['float', '1.5'], - 'param3': ['boolean', 'True']}), + 'optional_parameters': dumps( + {'param1': ['string', ''], + 'param2': ['float', '1.5'], + 'param3': ['boolean', 'True'], + 'param4': ['mchoice:["opt1", "opt2", "opt3"]', + dumps(['opt1', 'opt2'])]}), 'outputs': dumps({'out1': 'BIOM'}), 'default_parameter_sets': dumps( {'dflt1': {'param1': 'test', @@ -88,6 +93,25 @@ def test_post(self): self.assertEqual(obs.code, 200) obs = _get_command('QIIME', '1.9.1', 'New Command') self.assertEqual(obs.name, 'New Command') + self.assertFalse(obs.analysis_only) + + # Create a new command that is analysis only + data = { + 'name': 'New analysis command', + 'description': 'Analysis command added for testing', + 'required_parameters': dumps( + {'in_data': ['artifact:["BIOM"]', None]}), + 'optional_parameters': dumps({'param1': ['string', 'default']}), + 'outputs': dumps({'outtable': 'BIOM'}), + 'default_parameter_sets': dumps({'dflt1': {'param1': 'test'}}), + 'analysis_only': True + } + obs = self.post('/qiita_db/plugins/QIIME/1.9.1/commands/', data=data, + headers=self.header) + self.assertEqual(obs.code, 200) + obs = _get_command('QIIME', '1.9.1', 'New analysis command') + self.assertEqual(obs.name, 'New analysis command') + self.assertTrue(obs.analysis_only) class CommandHandlerTests(OauthTestingBase): diff --git a/qiita_db/investigation.py b/qiita_db/investigation.py index 9f04ada0f..a2c07b1c2 100644 --- a/qiita_db/investigation.py +++ b/qiita_db/investigation.py @@ -23,7 +23,7 @@ REQUIRED_KEYS = {"name", "description", "contact_person"} -class Investigation(qdb.base.QiitaStatusObject): +class Investigation(qdb.base.QiitaObject): """ Study object to access to the Qiita Study information diff --git a/qiita_db/job.py b/qiita_db/job.py deleted file mode 100644 index 2cb016717..000000000 --- a/qiita_db/job.py +++ /dev/null @@ -1,611 +0,0 @@ -r""" -Data objects (:mod: `qiita_db.data`) -==================================== - -..currentmodule:: qiita_db.data - -This module provides functionality for creating, running, and storing results -of jobs in an analysis. It also provides the ability to query what commmands -are available for jobs, as well as the options for these commands. - -Classes -------- - -..autosummary:: - :toctree: generated/ - - Job - Command -""" -# ----------------------------------------------------------------------------- -# Copyright (c) 2014--, The Qiita Development Team. -# -# Distributed under the terms of the BSD 3-clause License. -# -# The full license is in the file LICENSE, distributed with this software. -# ----------------------------------------------------------------------------- -from __future__ import division -from json import loads -from os.path import join, relpath -from glob import glob -from functools import partial -from collections import defaultdict - -import qiita_db as qdb - - -class Job(qdb.base.QiitaStatusObject): - """ - Job object to access to the Qiita Job information - - Attributes - ---------- - datatype - command - options - results - error - - Methods - ------- - set_error - add_results - """ - _table = "job" - - def _lock_job(self): - """Raises QiitaDBStatusError if study is public""" - if self.check_status(("completed", "error")): - raise qdb.exceptions.QiitaDBStatusError( - "Can't change status of finished job!") - - def _status_setter_checks(self): - r"""Perform a check to make sure not setting status away from completed - or errored - """ - self._lock_job() - - @staticmethod - def get_commands(): - """returns commands available with the options as well - - Returns - ------- - list of command objects - """ - return Command.create_list() - - @classmethod - def exists(cls, datatype, command, options, analysis, - input_file_reference, input_file_software_command, - return_existing=False): - """Checks if the given job already exists - - Parameters - ---------- - datatype : str - Datatype the job is operating on - command : str - The name of the command run on the data - options : dict - Options for the command in the format {option: value} - analysis : Analysis object - The analysis the job will be attached to on creation - input_file_reference : Reference object - The reference object used to create the input file - input_file_software_command: Software.Command object - The software command object used to create the input file - return_existing : bool, optional - If True, function will return the instatiated Job object for the - matching job. Default False - - Returns - ------- - bool - Whether the job exists or not - Job or None, optional - If return_existing is True, the Job object of the matching job or - None if none exists - """ - with qdb.sql_connection.TRN: - # check passed arguments and grab analyses for matching jobs - datatype_id = qdb.util.convert_to_id(datatype, "data_type") - sql = "SELECT command_id FROM qiita.command WHERE name = %s" - qdb.sql_connection.TRN.add(sql, [command]) - command_id = qdb.sql_connection.TRN.execute_fetchlast() - - opts_json = qdb.util.params_dict_to_json(options) - sql = """SELECT DISTINCT analysis_id, job_id - FROM qiita.analysis_job - JOIN qiita.{0} USING (job_id) - WHERE data_type_id = %s - AND command_id = %s - AND options = %s - AND input_file_reference_id = %s - AND input_file_software_command_id = %s - """.format(cls._table) - rid = (input_file_reference.id - if input_file_reference is not None else None) - cid = (input_file_software_command.id - if input_file_software_command is not None else None) - qdb.sql_connection.TRN.add( - sql, [datatype_id, command_id, opts_json, rid, cid]) - analyses = qdb.sql_connection.TRN.execute_fetchindex() - - if not analyses and return_existing: - # stop looking since we have no possible matches - return False, None - elif not analyses: - return False - - # build the samples dict as list of samples keyed to - # their artifact_id - sql = """SELECT artifact_id, array_agg( - sample_id ORDER BY sample_id) - FROM qiita.analysis_sample - WHERE analysis_id = %s GROUP BY artifact_id""" - qdb.sql_connection.TRN.add(sql, [analysis.id]) - samples = dict(qdb.sql_connection.TRN.execute_fetchindex()) - - # check passed analyses' samples dict against all found analyses - matched_job = None - for aid, jid in analyses: - # build the samples dict for a found analysis - qdb.sql_connection.TRN.add(sql, [aid]) - comp_samples = dict( - qdb.sql_connection.TRN.execute_fetchindex()) - - # compare samples and stop checking if a match is found - matched_samples = samples == comp_samples - if matched_samples: - matched_job = jid - break - - if return_existing: - return matched_samples, (cls(matched_job) if matched_job - else None) - - return matched_samples - - @classmethod - def delete(cls, jobid): - """Removes a job and all files attached to it - - Parameters - ---------- - jobid : int - ID of the job to delete - - Notes - ----- - This function will remove a job from all analyses it is attached to in - analysis_job table, as well as the job itself from the job table. All - files and references to files for the job will be removed from the - filepath and job_results_filepath tables. All the job's files on the - filesystem will also be removed. - """ - with qdb.sql_connection.TRN: - # store filepath info for later use - sql = """SELECT filepath, filepath_id - FROM qiita.filepath - JOIN qiita.job_results_filepath USING (filepath_id) - WHERE job_id = %s""" - args = [jobid] - qdb.sql_connection.TRN.add(sql, args) - filepaths = qdb.sql_connection.TRN.execute_fetchindex() - - # remove fiepath links in DB - sql = "DELETE FROM qiita.job_results_filepath WHERE job_id = %s" - qdb.sql_connection.TRN.add(sql, args) - - sql = "DELETE FROM qiita.filepath WHERE filepath_id IN %s" - qdb.sql_connection.TRN.add(sql, [tuple(fp[1] for fp in filepaths)]) - - # remove job - sql = "DELETE FROM qiita.analysis_job WHERE job_id = %s" - qdb.sql_connection.TRN.add(sql, args) - sql = "DELETE FROM qiita.collection_job WHERE job_id = %s" - qdb.sql_connection.TRN.add(sql, args) - sql = "DELETE FROM qiita.job WHERE job_id = %s" - qdb.sql_connection.TRN.add(sql, args) - - qdb.sql_connection.TRN.execute() - - @classmethod - def create(cls, datatype, command, options, analysis, - input_file_reference, input_file_software_command, - return_existing=False): - """Creates a new job on the database - - Parameters - ---------- - datatype : str - The datatype in which this job applies - command : str - The name of the command executed in this job - analysis : Analysis object - The analysis which this job belongs to - input_file_reference : Reference object - The reference object used to create the input file - input_file_software_command: Software.Command object - The software command object used to create the input file - return_existing : bool, optional - If True, returns an instantiated Job object pointing to an already - existing job with the given parameters. Default False - - Returns - ------- - Job object - The newly created job - - Raises - ------ - QiitaDBDuplicateError - return_existing is False and an exact duplicate of the job already - exists in the DB. - """ - with qdb.sql_connection.TRN: - analysis_sql = """INSERT INTO qiita.analysis_job - (analysis_id, job_id) VALUES (%s, %s)""" - exists, job = cls.exists(datatype, command, options, analysis, - input_file_reference, - input_file_software_command, - return_existing=True) - - if exists: - if return_existing: - # add job to analysis - qdb.sql_connection.TRN.add( - analysis_sql, [analysis.id, job.id]) - qdb.sql_connection.TRN.execute() - return job - else: - raise qdb.exceptions.QiitaDBDuplicateError( - "Job", "datatype: %s, command: %s, options: %s, " - "analysis: %s" - % (datatype, command, options, analysis.id)) - - # Get the datatype and command ids from the strings - datatype_id = qdb.util.convert_to_id(datatype, "data_type") - sql = "SELECT command_id FROM qiita.command WHERE name = %s" - qdb.sql_connection.TRN.add(sql, [command]) - command_id = qdb.sql_connection.TRN.execute_fetchlast() - opts_json = qdb.util.params_dict_to_json(options) - - # Create the job and return it - sql = """INSERT INTO qiita.{0} (data_type_id, job_status_id, - command_id, options, - input_file_reference_id, - input_file_software_command_id) - VALUES (%s, %s, %s, %s, %s, %s) - RETURNING job_id""".format(cls._table) - rid = (input_file_reference.id - if input_file_reference is not None else None) - cid = (input_file_software_command.id - if input_file_software_command is not None else None) - qdb.sql_connection.TRN.add( - sql, [datatype_id, 1, command_id, opts_json, rid, cid]) - job_id = qdb.sql_connection.TRN.execute_fetchlast() - - # add job to analysis - qdb.sql_connection.TRN.add(analysis_sql, [analysis.id, job_id]) - qdb.sql_connection.TRN.execute() - - return cls(job_id) - - @property - def datatype(self): - with qdb.sql_connection.TRN: - sql = """SELECT data_type - FROM qiita.data_type - WHERE data_type_id = ( - SELECT data_type_id - FROM qiita.{0} - WHERE job_id = %s)""".format(self._table) - qdb.sql_connection.TRN.add(sql, [self._id]) - return qdb.sql_connection.TRN.execute_fetchlast() - - @property - def command(self): - """Returns the command of the job as (name, command) - - Returns - ------- - str - command run by the job - """ - with qdb.sql_connection.TRN: - sql = """SELECT name, command - FROM qiita.command - WHERE command_id = ( - SELECT command_id - FROM qiita.{0} - WHERE job_id = %s)""".format(self._table) - qdb.sql_connection.TRN.add(sql, [self._id]) - # We only want the first row (the only one present) - return qdb.sql_connection.TRN.execute_fetchindex()[0] - - @property - def options(self): - """Options used in the job - - Returns - ------- - dict - options in the format {option: setting} - """ - with qdb.sql_connection.TRN: - sql = """SELECT options FROM qiita.{0} - WHERE job_id = %s""".format(self._table) - qdb.sql_connection.TRN.add(sql, [self._id]) - db_opts = qdb.sql_connection.TRN.execute_fetchlast() - opts = loads(db_opts) if db_opts else {} - - sql = """SELECT command, output - FROM qiita.command - WHERE command_id = ( - SELECT command_id - FROM qiita.{0} - WHERE job_id = %s)""".format(self._table) - qdb.sql_connection.TRN.add(sql, [self._id]) - db_comm = qdb.sql_connection.TRN.execute_fetchindex()[0] - - out_opt = loads(db_comm[1]) - _, mp, _ = qdb.util.get_mountpoint('job', retrieve_subdir=True)[0] - join_f = partial(join, mp) - for k in out_opt: - opts[k] = join_f("%s_%s_%s" % (self._id, db_comm[0], - k.strip("-"))) - return opts - - @options.setter - def options(self, opts): - """ Sets the options for the job - - Parameters - ---------- - opts: dict - The options for the command in format {option: value} - """ - with qdb.sql_connection.TRN: - # make sure job is editable - self._lock_job() - - # JSON the options dictionary - opts_json = qdb.util.params_dict_to_json(opts) - # Add the options to the job - sql = """UPDATE qiita.{0} SET options = %s - WHERE job_id = %s""".format(self._table) - qdb.sql_connection.TRN.add(sql, [opts_json, self._id]) - qdb.sql_connection.TRN.execute() - - @property - def results(self): - """List of job result filepaths - - Returns - ------- - list - Filepaths to the result files - """ - # Select results filepaths and filepath types from the database - with qdb.sql_connection.TRN: - _, basedir = qdb.util.get_mountpoint('job')[0] - sql = """SELECT filepath, filepath_type - FROM qiita.filepath - JOIN qiita.filepath_type USING (filepath_type_id) - JOIN qiita.job_results_filepath USING (filepath_id) - WHERE job_id = %s""" - qdb.sql_connection.TRN.add(sql, [self._id]) - results = qdb.sql_connection.TRN.execute_fetchindex() - - def add_html(basedir, check_dir, result_fps): - for res in glob(join(basedir, check_dir, "*.htm")) + \ - glob(join(basedir, check_dir, "*.html")): - result_fps.append(relpath(res, basedir)) - - # create new list, with relative paths from db base - result_fps = [] - for fp in results: - if fp[1] == "directory": - # directory, so all html files in it are results - # first, see if we have any in the main directory - add_html(basedir, fp[0], result_fps) - # now do all subdirectories - add_html(basedir, join(fp[0], "*"), result_fps) - else: - # result is exact filepath given - result_fps.append(fp[0]) - return result_fps - - @property - def error(self): - """String with an error message, if the job failed - - Returns - ------- - str or None - error message/traceback for a job, or None if none exists - """ - with qdb.sql_connection.TRN: - sql = "SELECT log_id FROM qiita.{0} WHERE job_id = %s".format( - self._table) - qdb.sql_connection.TRN.add(sql, [self._id]) - logging_id = qdb.sql_connection.TRN.execute_fetchlast() - return (qdb.logger.LogEntry(logging_id) - if logging_id is not None else None) - -# --- Functions --- - def set_error(self, msg): - """Logs an error for the job - - Parameters - ---------- - msg : str - Error message/stacktrace if available - """ - with qdb.sql_connection.TRN: - log_entry = qdb.logger.LogEntry.create( - 'Runtime', msg, info={'job': self._id}) - self._lock_job() - - err_id = qdb.util.convert_to_id('error', 'job_status', 'status') - # attach the error to the job and set to error - sql = """UPDATE qiita.{0} SET log_id = %s, job_status_id = %s - WHERE job_id = %s""".format(self._table) - qdb.sql_connection.TRN.add(sql, [log_entry.id, err_id, self._id]) - qdb.sql_connection.TRN.execute() - - def add_results(self, results): - """Adds a list of results to the results - - Parameters - ---------- - results : list of tuples - filepath information to add to job, in format - [(filepath, type), ...] - Where type is the filepath type of the filepath passed - - Notes - ----- - Curently available file types are: - biom, directory, plain_text - """ - with qdb.sql_connection.TRN: - self._lock_job() - # convert all file type text to file type ids - res_ids = [(fp, qdb.util.convert_to_id(fptype, "filepath_type")) - for fp, fptype in results] - file_ids = qdb.util.insert_filepaths( - res_ids, self._id, self._table, "filepath", move_files=False) - - # associate filepaths with job - sql = """INSERT INTO qiita.{0}_results_filepath - (job_id, filepath_id) - VALUES (%s, %s)""".format(self._table) - qdb.sql_connection.TRN.add( - sql, [[self._id, fid] for fid in file_ids], many=True) - qdb.sql_connection.TRN.execute() - - -class Command(object): - """Holds all information on the commands available - - This will be an in-memory representation because the command table is - considerably more static than other objects tables, changing only with new - QIIME releases. - - Attributes - ---------- - name - command - input_opts - required_opts - optional_opts - output_opts - """ - @classmethod - def create_list(cls): - """Creates list of all available commands - - Returns - ------- - list of Command objects - """ - with qdb.sql_connection.TRN: - qdb.sql_connection.TRN.add("SELECT * FROM qiita.command") - commands = qdb.sql_connection.TRN.execute_fetchindex() - # create the list of command objects - return [cls(c["name"], c["command"], c["input"], c["required"], - c["optional"], c["output"]) for c in commands] - - @classmethod - def get_commands_by_datatype(cls, datatypes=None): - """Returns the commands available for all or a subset of the datatypes - - Parameters - ---------- - datatypes : list of str, optional - List of the datatypes to get commands for. Default is all datatypes - - Returns - ------- - dict of lists of Command objects - Returns commands in the format {datatype: [com name1, com name2]} - - Notes - ----- - If no datatypes are passed, the function will default to returning all - datatypes available. - """ - with qdb.sql_connection.TRN: - # get the ids of the datatypes to get commands for - if datatypes is not None: - datatype_info = [(qdb.util.convert_to_id(dt, "data_type"), dt) - for dt in datatypes] - else: - sql = "SELECT data_type_id, data_type from qiita.data_type" - qdb.sql_connection.TRN.add(sql) - datatype_info = qdb.sql_connection.TRN.execute_fetchindex() - - commands = defaultdict(list) - # get commands for each datatype - sql = """SELECT C.* - FROM qiita.command C - JOIN qiita.command_data_type USING (command_id) - WHERE data_type_id = %s""" - for dt_id, dt in datatype_info: - qdb.sql_connection.TRN.add(sql, [dt_id]) - comms = qdb.sql_connection.TRN.execute_fetchindex() - for comm in comms: - commands[dt].append(cls(comm["name"], comm["command"], - comm["input"], - comm["required"], - comm["optional"], - comm["output"])) - return commands - - def __eq__(self, other): - if type(self) != type(other): - return False - if self.name != other.name: - return False - if self.command != other.command: - return False - if self.input_opts != other.input_opts: - return False - if self.output_opts != other.output_opts: - return False - if self.required_opts != other.required_opts: - return False - if self.optional_opts != other.optional_opts: - return False - return True - - def __ne__(self, other): - return not self.__eq__(other) - - def __init__(self, name, command, input_opts, required_opts, - optional_opts, output_opts): - """Creates the command object - - Parameters: - name : str - Name of the command - command: str - python command to run - input_opts : str - JSON of input options for the command - required_opts : str - JSON of required options for the command - optional_opts : str - JSON of optional options for the command - output_opts : str - JSON of output options for the command - """ - self.name = name - self.command = command - self.input_opts = loads(input_opts) - self.required_opts = loads(required_opts) - self.optional_opts = loads(optional_opts) - self.output_opts = loads(output_opts) diff --git a/qiita_db/meta_util.py b/qiita_db/meta_util.py index 78bc8ee3f..1978ca5e4 100644 --- a/qiita_db/meta_util.py +++ b/qiita_db/meta_util.py @@ -97,15 +97,12 @@ def validate_filepath_access_by_user(user, filepath_id): (SELECT array_agg(prep_template_id) FROM qiita.prep_template_filepath WHERE filepath_id = {0}) AS prep_info, - (SELECT array_agg(job_id) - FROM qiita.job_results_filepath - WHERE filepath_id = {0}) AS job_results, (SELECT array_agg(analysis_id) FROM qiita.analysis_filepath WHERE filepath_id = {0}) AS analysis""".format(filepath_id) TRN.add(sql) - arid, sid, pid, jid, anid = TRN.execute_fetchflatten() + arid, sid, pid, anid = TRN.execute_fetchflatten() # artifacts if arid: @@ -114,8 +111,14 @@ def validate_filepath_access_by_user(user, filepath_id): if artifact.visibility == 'public': return True else: - # let's take the visibility via the Study - return artifact.study.has_access(user) + study = artifact.study + if study: + # let's take the visibility via the Study + return artifact.study.has_access(user) + else: + analysis = artifact.analysis + return analysis in ( + user.private_analyses | user.shared_analyses) # sample info files elif sid: # the visibility of the sample info file is given by the @@ -144,22 +147,13 @@ def validate_filepath_access_by_user(user, filepath_id): return True return False # analyses - elif anid or jid: - if jid: - # [0] cause we should only have 1 - sql = """SELECT analysis_id FROM qiita.analysis_job - WHERE job_id = {0}""".format(jid[0]) - TRN.add(sql) - aid = TRN.execute_fetchlast() - else: - aid = anid[0] + elif anid: # [0] cause we should only have 1 + aid = anid[0] analysis = qdb.analysis.Analysis(aid) - if analysis.status == 'public': - return True - else: - return analysis in ( - user.private_analyses | user.shared_analyses) + return analysis in ( + user.private_analyses | user.shared_analyses) + return False def update_redis_stats(): diff --git a/qiita_db/metadata_template/base_metadata_template.py b/qiita_db/metadata_template/base_metadata_template.py index a4de41ad3..4bd340dfb 100644 --- a/qiita_db/metadata_template/base_metadata_template.py +++ b/qiita_db/metadata_template/base_metadata_template.py @@ -557,10 +557,7 @@ def _common_creation_steps(cls, md_template, obj_id): table_name = cls._table_name(obj_id) column_datatype = ["%s varchar" % col for col in headers] sql = """CREATE TABLE qiita.{0} ( - sample_id varchar NOT NULL, {1}, - CONSTRAINT fk_{0} FOREIGN KEY (sample_id) - REFERENCES qiita.study_sample (sample_id) - ON UPDATE CASCADE + sample_id varchar NOT NULL, {1} )""".format(table_name, ', '.join(column_datatype)) qdb.sql_connection.TRN.add(sql) diff --git a/qiita_db/metadata_template/test/test_prep_template.py b/qiita_db/metadata_template/test/test_prep_template.py index 8e39d2ee6..82d72a79a 100644 --- a/qiita_db/metadata_template/test/test_prep_template.py +++ b/qiita_db/metadata_template/test/test_prep_template.py @@ -876,8 +876,6 @@ def _common_creation_checks(self, pt, fp_count): # prep and qiime files have been created filepaths = pt.get_filepaths() self.assertEqual(len(filepaths), 2) - self.assertEqual(filepaths[0][0], fp_count + 2) - self.assertEqual(filepaths[1][0], fp_count + 1) def test_create(self): """Creates a new PrepTemplate""" @@ -932,7 +930,6 @@ def test_create_data_type_id(self): def test_create_warning(self): """Warns if a required columns is missing for a given functionality """ - fp_count = qdb.util.get_count("qiita.filepath") del self.metadata['barcode'] pt = npt.assert_warns( qdb.exceptions.QiitaDBWarning, @@ -1000,8 +997,6 @@ def test_create_warning(self): # prep and qiime files have been created filepaths = pt.get_filepaths() self.assertEqual(len(filepaths), 2) - self.assertEqual(filepaths[0][0], fp_count + 2) - self.assertEqual(filepaths[1][0], fp_count + 1) def test_create_investigation_type_error(self): """Create raises an error if the investigation_type does not exists""" diff --git a/qiita_db/metadata_template/test/test_sample_template.py b/qiita_db/metadata_template/test/test_sample_template.py index bca13f35d..c9addf20e 100644 --- a/qiita_db/metadata_template/test/test_sample_template.py +++ b/qiita_db/metadata_template/test/test_sample_template.py @@ -1320,7 +1320,7 @@ def test_get_filepath(self): # change based on time and the same functionality is being tested # in data.py exp_id = self.conn_handler.execute_fetchone( - "SELECT count(1) FROM qiita.filepath")[0] + 1 + "SELECT last_value FROM qiita.filepath_filepath_id_seq")[0] + 1 st = qdb.metadata_template.sample_template.SampleTemplate.create( self.metadata, self.new_study) self.assertEqual(st.get_filepaths()[0][0], exp_id) diff --git a/qiita_db/portal.py b/qiita_db/portal.py index 659a50bac..8ddebad65 100644 --- a/qiita_db/portal.py +++ b/qiita_db/portal.py @@ -91,14 +91,10 @@ def create(cls, portal, desc): SELECT email FROM qiita.qiita_user LOOP INSERT INTO qiita.analysis - (email, name, description, dflt, - analysis_status_id) - VALUES (eml, eml || '-dflt', 'dflt', true, 1) + (email, name, description, dflt) + VALUES (eml, eml || '-dflt', 'dflt', true) RETURNING analysis_id INTO aid; - INSERT INTO qiita.analysis_workflow (analysis_id, step) - VALUES (aid, 2); - INSERT INTO qiita.analysis_portal (analysis_id, portal_type_id) VALUES (aid, pid); @@ -162,9 +158,6 @@ def delete(portal): DELETE FROM qiita.analysis_portal WHERE analysis_id = aid; - DELETE FROM qiita.analysis_workflow - WHERE analysis_id = aid; - DELETE FROM qiita.analysis_sample WHERE analysis_id = aid; diff --git a/qiita_db/private.py b/qiita_db/private.py new file mode 100644 index 000000000..78a286f51 --- /dev/null +++ b/qiita_db/private.py @@ -0,0 +1,74 @@ +# ----------------------------------------------------------------------------- +# Copyright (c) 2014--, The Qiita Development Team. +# +# Distributed under the terms of the BSD 3-clause License. +# +# The full license is in the file LICENSE, distributed with this software. +# ----------------------------------------------------------------------------- + +from json import dumps +from sys import exc_info +from time import sleep +import traceback + +import qiita_db as qdb + + +def build_analysis_files(job): + """Builds the files for an analysis + + Parameters + ---------- + job : qiita_db.processing_job.ProcessingJob + The processing job with the information for building the files + """ + with qdb.sql_connection.TRN: + params = job.parameters.values + analysis_id = params['analysis'] + merge_duplicated_sample_ids = params['merge_dup_sample_ids'] + analysis = qdb.analysis.Analysis(analysis_id) + biom_files = analysis.build_files(merge_duplicated_sample_ids) + + cmd = qdb.software.Command.get_validator('BIOM') + val_jobs = [] + for dtype, biom_fp in biom_files: + validate_params = qdb.software.Parameters.load( + cmd, values_dict={'files': dumps({'biom': [biom_fp]}), + 'artifact_type': 'BIOM', + 'provenance': dumps({'job': job.id, + 'data_type': dtype}), + 'analysis': analysis_id}) + val_jobs.append(qdb.processing_job.ProcessingJob.create( + analysis.owner, validate_params)) + + job._set_validator_jobs(val_jobs) + + for j in val_jobs: + j.submit() + sleep(1) + + +TASK_DICT = {'build_analysis_files': build_analysis_files} + + +def private_task(job_id): + """Complets a Qiita private task + + Parameters + ---------- + job_id : str + The job id + """ + if job_id == 'register': + # We don't need to do anything here if Qiita is registering plugins + return + + job = qdb.processing_job.ProcessingJob(job_id) + job.update_heartbeat_state() + task_name = job.command.name + + try: + TASK_DICT[task_name](job) + except Exception: + job.complete(False, error="Error executing private task: %s" + % traceback.format_exception(*exc_info())) diff --git a/qiita_db/processing_job.py b/qiita_db/processing_job.py index 4e99d3b5b..66e176feb 100644 --- a/qiita_db/processing_job.py +++ b/qiita_db/processing_job.py @@ -69,6 +69,30 @@ def _job_submitter(job, cmd): job.complete(False, error=error) +def private_job_submitter(job_name, command, args): + """Submits a private job + + Parameters + ---------- + job_name : str + The name of the job + command: str + The private command to be executed + args: list of str + The arguments to the private command + """ + + cmd = "%s '%s' %s %s" % (qiita_config.private_launcher, + qiita_config.qiita_env, command, + ' '.join("'%s'" % a for a in args)) + std_out, std_err, return_value = _system_call(cmd) + error = "" + if return_value != 0: + error = ("Can't submit private task '%s':\n" + "Std output:%s\nStd error: %s" % (command, std_out, std_err)) + return (return_value == 0), error + + class ProcessingJob(qdb.base.QiitaObject): r"""Models a job that executes a command in a set of artifacts @@ -352,30 +376,49 @@ def release(self): qdb.sql_connection.TRN.add(sql, [self.id]) a_info = qdb.sql_connection.TRN.execute_fetchlast() - atype = a_info['artifact_type'] - filepaths = a_info['filepaths'] provenance = loads(self.parameters.values['provenance']) job = ProcessingJob(provenance['job']) - parents = job.input_artifacts - params = job.parameters + if 'data_type' in a_info: + # This job is resulting from a private job + parents = None + params = None + cmd_out_id = None + data_type = a_info['data_type'] + analysis = qdb.analysis.Analysis( + job.parameters.values['analysis']) + a_info = a_info['artifact_data'] + else: + # This job is resulting from a plugin job + parents = job.input_artifacts + params = job.parameters + cmd_out_id = provenance['cmd_out_id'] + analysis = None + data_type = None # Create the artifact + atype = a_info['artifact_type'] + filepaths = a_info['filepaths'] a = qdb.artifact.Artifact.create( filepaths, atype, parents=parents, - processing_parameters=params) + processing_parameters=params, + analysis=analysis, data_type=data_type) - cmd_out_id = provenance['cmd_out_id'] - mapping = {cmd_out_id: a.id} self._set_status('success') + mapping = {} + if cmd_out_id is not None: + mapping = {cmd_out_id: a.id} + return mapping def release_validators(self): """Allows all the validator job spawned by this job to complete""" with qdb.sql_connection.TRN: - if self.command.software.type != 'artifact transformation': + if self.command.software.type not in ('artifact transformation', + 'private'): raise qdb.exceptions.QiitaDBOperationNotPermittedError( - "Only artifact transformation jobs can release validators") + "Only artifact transformation and private jobs can " + "release validators") # Check if all the validators are ready by checking that there is # no validator processing job whose status is not waiting @@ -405,16 +448,17 @@ def release_validators(self): vjob = ProcessingJob(jid) mapping.update(vjob.release()) - sql = """INSERT INTO - qiita.artifact_output_processing_job - (artifact_id, processing_job_id, - command_output_id) - VALUES (%s, %s, %s)""" - sql_args = [[aid, self.id, outid] - for outid, aid in viewitems(mapping)] - qdb.sql_connection.TRN.add(sql, sql_args, many=True) - - self._update_and_launch_children(mapping) + if mapping: + sql = """INSERT INTO + qiita.artifact_output_processing_job + (artifact_id, processing_job_id, + command_output_id) + VALUES (%s, %s, %s)""" + sql_args = [[aid, self.id, outid] + for outid, aid in viewitems(mapping)] + qdb.sql_connection.TRN.add(sql, sql_args, many=True) + + self._update_and_launch_children(mapping) self._set_status('success') else: self.step = "Validating outputs (%d remaining)" % remaining @@ -444,6 +488,9 @@ def _complete_artifact_definition(self, artifact_data): # The artifact is a result from a previous job provenance = loads(job_params['provenance']) job = ProcessingJob(provenance['job']) + if provenance.get('data_type') is not None: + artifact_data = {'data_type': provenance['data_type'], + 'artifact_data': artifact_data} sql = """UPDATE qiita.processing_job_validator SET artifact_info = %s @@ -455,11 +502,27 @@ def _complete_artifact_definition(self, artifact_data): self._set_status('waiting') job.release_validators() else: - # The artifact is uploaded by the user - pt = qdb.metadata_template.prep_template.PrepTemplate( - job_params['template']) + # The artifact is uploaded by the user or is the initial + # artifact of an analysis + if ('analysis' in job_params and + job_params['analysis'] is not None): + pt = None + an = qdb.analysis.Analysis(job_params['analysis']) + sql = """SELECT data_type + FROM qiita.analysis_processing_job + WHERE analysis_id = %s + AND processing_job_id = %s""" + qdb.sql_connection.TRN.add(sql, [an.id, self.id]) + data_type = qdb.sql_connection.TRN.execute_fetchlast() + else: + pt = qdb.metadata_template.prep_template.PrepTemplate( + job_params['template']) + an = None + data_type = None + qdb.artifact.Artifact.create( - filepaths, atype, prep_template=pt) + filepaths, atype, prep_template=pt, analysis=an, + data_type=data_type) self._set_status('success') def _complete_artifact_transformation(self, artifacts_data): @@ -505,11 +568,21 @@ def _complete_artifact_transformation(self, artifacts_data): templates = set() for artifact in self.input_artifacts: templates.update(pt.id for pt in artifact.prep_templates) + template = None + analysis = None if len(templates) > 1: raise qdb.exceptions.QiitaDBError( "Currently only single prep template " "is allowed, found %d" % len(templates)) - template = templates.pop() + elif len(templates) == 1: + template = templates.pop() + else: + # In this case we have 0 templates. What this means is that + # this artifact is being generated in the analysis pipeline + # All the artifacts included in the analysis pipeline + # belong to the same analysis, so we can just ask the + # first artifact for the analysis that it belongs to + analysis = self.input_artifacts[0].analysis.id # Once the validate job completes, it needs to know if it has # been generated from a command (and how) or if it has been @@ -530,6 +603,7 @@ def _complete_artifact_transformation(self, artifacts_data): cmd, values_dict={'files': dumps(filepaths), 'artifact_type': atype, 'template': template, + 'analysis': analysis, 'provenance': dumps(provenance)}) validator_jobs.append( ProcessingJob.create(self.user, validate_params)) @@ -1134,7 +1208,16 @@ def _raise_if_not_in_construction(self): WHERE processing_job_workflow_id = %s""" qdb.sql_connection.TRN.add(sql, [self.id]) res = qdb.sql_connection.TRN.execute_fetchflatten() - if len(res) != 1 or res[0] != 'in_construction': + # If the above SQL query returns a single element and the value + # is different from in construction, it means that all the jobs + # in the workflow are in the same status and it is not + # 'in_construction', hence raise the error. If the above SQL query + # returns more than value (len(res) > 1) it means that the workflow + # is no longer in construction cause some jobs have been submited + # for processing. Note that if the above query doesn't retrun any + # value, it means that no jobs are in the workflow and that means + # that the workflow is in construction. + if (len(res) == 1 and res[0] != 'in_construction') or len(res) > 1: # The workflow is no longer in construction, raise an error raise qdb.exceptions.QiitaDBOperationNotPermittedError( "Workflow not in construction") diff --git a/qiita_db/software.py b/qiita_db/software.py index 7335e6100..ce147afcb 100644 --- a/qiita_db/software.py +++ b/qiita_db/software.py @@ -44,7 +44,8 @@ class Command(qdb.base.QiitaObject): _table = "software_command" @classmethod - def get_commands_by_input_type(cls, artifact_types, active_only=True): + def get_commands_by_input_type(cls, artifact_types, active_only=True, + exclude_analysis=True): """Returns the commands that can process the given artifact types Parameters @@ -54,6 +55,8 @@ def get_commands_by_input_type(cls, artifact_types, active_only=True): active_only : bool, optional If True, return only active commands, otherwise return all commands Default: True + exclude_analysis : bool, optional + If True, return commands that are not part of the analysis pipeline Returns ------- @@ -70,6 +73,8 @@ def get_commands_by_input_type(cls, artifact_types, active_only=True): WHERE artifact_type IN %s""" if active_only: sql += " AND active = True" + if exclude_analysis: + sql += " AND is_analysis = False" qdb.sql_connection.TRN.add(sql, [tuple(artifact_types)]) for c_id in qdb.sql_connection.TRN.execute_fetchflatten(): yield cls(c_id) @@ -191,7 +196,8 @@ def exists(cls, software, name): return qdb.sql_connection.TRN.execute_fetchlast() @classmethod - def create(cls, software, name, description, parameters, outputs=None): + def create(cls, software, name, description, parameters, outputs=None, + analysis_only=False): r"""Creates a new command in the system The supported types for the parameters are: @@ -222,6 +228,9 @@ def create(cls, software, name, description, parameters, outputs=None): outputs : dict, optional The description of the outputs that this command generated. The format is: {output_name: artifact_type} + analysis_only : bool, optional + If true, then the command will only be available on the analysis + pipeline. Default: False. Returns ------- @@ -263,18 +272,27 @@ def create(cls, software, name, description, parameters, outputs=None): ptype, dflt = vals # Check that the type is one of the supported types supported_types = ['string', 'integer', 'float', 'reference', - 'boolean', 'prep_template'] + 'boolean', 'prep_template', 'analysis'] if ptype not in supported_types and not ptype.startswith( - ('choice', 'artifact')): - supported_types.extend(['choice', 'artifact']) + ('choice', 'mchoice', 'artifact')): + supported_types.extend(['choice', 'mchoice', 'artifact']) raise qdb.exceptions.QiitaDBError( "Unsupported parameters type '%s' for parameter %s. " "Supported types are: %s" % (ptype, pname, ', '.join(supported_types))) - if ptype.startswith('choice') and dflt is not None: - choices = loads(ptype.split(':')[1]) - if dflt not in choices: + if ptype.startswith(('choice', 'mchoice')) and dflt is not None: + choices = set(loads(ptype.split(':')[1])) + dflt_val = dflt + if ptype.startswith('choice'): + # In the choice case, the dflt value is a single string, + # create a list with it the string on it to use the + # issuperset call below + dflt_val = [dflt_val] + else: + # jsonize the list to store it in the DB + dflt = dumps(dflt) + if not choices.issuperset(dflt_val): raise qdb.exceptions.QiitaDBError( "The default value '%s' for the parameter %s is not " "listed in the available choices: %s" @@ -297,10 +315,10 @@ def create(cls, software, name, description, parameters, outputs=None): % (software.id, name)) # Add the command to the DB sql = """INSERT INTO qiita.software_command - (name, software_id, description) - VALUES (%s, %s, %s) + (name, software_id, description, is_analysis) + VALUES (%s, %s, %s, %s) RETURNING command_id""" - sql_params = [name, software.id, description] + sql_params = [name, software.id, description, analysis_only] qdb.sql_connection.TRN.add(sql, sql_params) c_id = qdb.sql_connection.TRN.execute_fetchlast() @@ -319,6 +337,7 @@ def create(cls, software, name, description, parameters, outputs=None): sql_type = """INSERT INTO qiita.parameter_artifact_type (command_parameter_id, artifact_type_id) VALUES (%s, %s)""" + supported_types = [] for pname, p_type, atypes in sql_artifact_params: sql_params = [c_id, pname, p_type, True, None] qdb.sql_connection.TRN.add(sql, sql_params) @@ -327,6 +346,30 @@ def create(cls, software, name, description, parameters, outputs=None): [pid, qdb.util.convert_to_id(at, 'artifact_type')] for at in atypes] qdb.sql_connection.TRN.add(sql_type, sql_params, many=True) + supported_types.extend([atid for _, atid in sql_params]) + + # If the software type is 'artifact definition', there are a couple + # of extra steps + if software.type == 'artifact definition': + # If supported types is not empty, link the software with these + # types + if supported_types: + sql = """INSERT INTO qiita.software_artifact_type + (software_id, artifact_type_id) + VALUES (%s, %s)""" + sql_params = [[software.id, atid] + for atid in supported_types] + qdb.sql_connection.TRN.add(sql, sql_params, many=True) + # If this is the validate command, we need to add the + # provenance parameter. This is used internally, that's why + # we are adding it here + if name == 'Validate': + sql = """INSERT INTO qiita.command_parameter + (command_id, parameter_name, parameter_type, + required, default_value) + VALUES (%s, 'provenance', 'string', 'False', NULL) + """ + qdb.sql_connection.TRN.add(sql, [c_id]) # Add the outputs to the command if outputs: @@ -445,7 +488,17 @@ def optional_parameters(self): WHERE command_id = %s AND required = false""" qdb.sql_connection.TRN.add(sql, [self.id]) res = qdb.sql_connection.TRN.execute_fetchindex() - return {pname: [ptype, dflt] for pname, ptype, dflt in res} + + # Define a function to load the json storing the default parameters + # if ptype is multiple choice. When I added it to the for loop as + # a one liner if, made the code a bit hard to read + def dflt_fmt(dflt, ptype): + if ptype.startswith('mchoice'): + return loads(dflt) + return dflt + + return {pname: [ptype, dflt_fmt(dflt, ptype)] + for pname, ptype, dflt in res} @property def default_parameter_sets(self): @@ -508,6 +561,22 @@ def activate(self): qdb.sql_connection.TRN.add(sql, [True, self.id]) return qdb.sql_connection.TRN.execute() + @property + def analysis_only(self): + """Returns if the command is an analysis-only command + + Returns + ------- + bool + Whether the command is analysis only or not + """ + with qdb.sql_connection.TRN: + sql = """SELECT is_analysis + FROM qiita.software_command + WHERE command_id = %s""" + qdb.sql_connection.TRN.add(sql, [self.id]) + return qdb.sql_connection.TRN.execute_fetchlast() + class Software(qdb.base.QiitaObject): r"""A software package available in the system diff --git a/qiita_db/support_files/patches/54.sql b/qiita_db/support_files/patches/54.sql new file mode 100644 index 000000000..c982566ad --- /dev/null +++ b/qiita_db/support_files/patches/54.sql @@ -0,0 +1,123 @@ +-- Jan 5, 2017 +-- Move the analysis to the plugin system. This is a major rewrite of the +-- database backend that supports the analysis pipeline. +-- After exploring the data on the database, we realized that +-- there are a lot of inconsistencies in the data. Unfortunately, this +-- makes the process of transferring the data from the old structure +-- to the new one a bit more challenging, as we will need to handle +-- different special cases. Furthermore, all the information needed is not +-- present in the database, since it requires checking BIOM files. Due to these +-- reason, the vast majority of the data transfer is done in the python patch +-- 51.py + +-- In this file we are just creating the new data structures. The old +-- datastructure will be dropped in the python patch once all data has been +-- transferred. + +-- Create the new data structures + +-- Table that links the analysis with the initial set of artifacts +CREATE TABLE qiita.analysis_artifact ( + analysis_id bigint NOT NULL, + artifact_id bigint NOT NULL, + CONSTRAINT idx_analysis_artifact_0 PRIMARY KEY (analysis_id, artifact_id) +); +CREATE INDEX idx_analysis_artifact_analysis ON qiita.analysis_artifact (analysis_id); +CREATE INDEX idx_analysis_artifact_artifact ON qiita.analysis_artifact (artifact_id); +ALTER TABLE qiita.analysis_artifact ADD CONSTRAINT fk_analysis_artifact_analysis FOREIGN KEY ( analysis_id ) REFERENCES qiita.analysis( analysis_id ); +ALTER TABLE qiita.analysis_artifact ADD CONSTRAINT fk_analysis_artifact_artifact FOREIGN KEY ( artifact_id ) REFERENCES qiita.artifact( artifact_id ); + +-- Droping the analysis status column cause now it depends on the artifacts +-- status, like the study does. +ALTER TABLE qiita.analysis DROP COLUMN analysis_status_id; + +-- Create a table to link the analysis with the jobs that create the initial +-- artifacts +CREATE TABLE qiita.analysis_processing_job ( + analysis_id bigint NOT NULL, + processing_job_id uuid NOT NULL, + CONSTRAINT idx_analysis_processing_job PRIMARY KEY ( analysis_id, processing_job_id ) + ) ; + +CREATE INDEX idx_analysis_processing_job_analysis ON qiita.analysis_processing_job ( analysis_id ) ; +CREATE INDEX idx_analysis_processing_job_pj ON qiita.analysis_processing_job ( processing_job_id ) ; +ALTER TABLE qiita.analysis_processing_job ADD CONSTRAINT fk_analysis_processing_job FOREIGN KEY ( analysis_id ) REFERENCES qiita.analysis( analysis_id ) ; +ALTER TABLE qiita.analysis_processing_job ADD CONSTRAINT fk_analysis_processing_job_pj FOREIGN KEY ( processing_job_id ) REFERENCES qiita.processing_job( processing_job_id ) ; + +-- Add a logging column in the analysis +ALTER TABLE qiita.analysis ADD logging_id bigint ; +CREATE INDEX idx_analysis_0 ON qiita.analysis ( logging_id ) ; +ALTER TABLE qiita.analysis ADD CONSTRAINT fk_analysis_logging FOREIGN KEY ( logging_id ) REFERENCES qiita.logging( logging_id ) ; + +-- Alter the software command table to differentiate between commands that +-- apply to the analysis pipeline or commands that apply on the study +-- processing pipeline +ALTER TABLE qiita.software_command ADD is_analysis bool DEFAULT 'False' NOT NULL; + +-- We can handle some of the special cases here, so we simplify the work in the +-- python patch + +-- Special case 1: there are jobs in the database that do not contain +-- any information about the options used to process those parameters. +-- However, these jobs do not have any results and all are marked either +-- as queued or error, although no error log has been saved. Since these +-- jobs are mainly useleess, we are going to remove them from the system +DELETE FROM qiita.analysis_job + WHERE job_id IN (SELECT job_id FROM qiita.job WHERE options = '{}'); +DELETE FROM qiita.job WHERE options = '{}'; + +-- Special case 2: there are a fair amount of jobs (719 last time I +-- checked) that are not attached to any analysis. Not sure how this +-- can happen, but these orphan jobs can't be accessed from anywhere +-- in the interface. Remove them from the system. Note that we are +-- unlinking the files but we are not removing them from the filepath +-- table. We will do that on the patch 47.py using the +-- purge_filepaths function, as it will make sure that those files are +-- not used anywhere else +DELETE FROM qiita.job_results_filepath WHERE job_id IN ( + SELECT job_id FROM qiita.job J WHERE NOT EXISTS ( + SELECT * FROM qiita.analysis_job AJ WHERE J.job_id = AJ.job_id)); +DELETE FROM qiita.job J WHERE NOT EXISTS ( + SELECT * FROM qiita.analysis_job AJ WHERE J.job_id = AJ.job_id); + +-- In the analysis pipeline, an artifact can have mutliple datatypes +-- (e.g. procrustes). Allow this by creating a new data_type being "multiomic" +INSERT INTO qiita.data_type (data_type) VALUES ('Multiomic'); + + +-- The valdiate command from BIOM will have an extra parameter, analysis +-- Magic number -> 4 BIOM command_id -> known for sure since it was added in +-- patch 36.sql +INSERT INTO qiita.command_parameter (command_id, parameter_name, parameter_type, required) + VALUES (4, 'analysis', 'analysis', FALSE); +-- The template comand now becomes optional, since it can be added either to +-- an analysis or to a prep template. command_parameter_id known from patch +-- 36.sql +UPDATE qiita.command_parameter SET required = FALSE WHERE command_parameter_id = 34; + +-- We are going to add a new special software type, and a new software. +-- This is going to be used internally by Qiita, so submit the private jobs. +-- This is needed for the analysis. +INSERT INTO qiita.software_type (software_type, description) + VALUES ('private', 'Internal Qiita jobs'); + +DO $do$ +DECLARE + qiita_sw_id bigint; + baf_cmd_id bigint; +BEGIN + INSERT INTO qiita.software (name, version, description, environment_script, start_script, software_type_id, active) + VALUES ('Qiita', 'alpha', 'Internal Qiita jobs', 'source activate qiita', 'qiita-private-plugin', 3, True) + RETURNING software_id INTO qiita_sw_id; + + INSERT INTO qiita.software_command (software_id, name, description) + VALUES (qiita_sw_id, 'build_analysis_files', 'Builds the files needed for the analysis') + RETURNING command_id INTO baf_cmd_id; + + INSERT INTO qiita.command_parameter (command_id, parameter_name, parameter_type, required, default_value) + VALUES (baf_cmd_id, 'analysis', 'analysis', True, NULL), + (baf_cmd_id, 'merge_dup_sample_ids', 'bool', False, 'False'); +END $do$; + +-- Add a new filepath type +INSERT INTO qiita.filepath_type (filepath_type) VALUES ('html_summary_dir'), ('qzv'); diff --git a/qiita_db/support_files/patches/55.sql b/qiita_db/support_files/patches/55.sql new file mode 100644 index 000000000..65bafd459 --- /dev/null +++ b/qiita_db/support_files/patches/55.sql @@ -0,0 +1,4 @@ +-- Jul 6, 2017 +-- DELETE all sample/prep CONSTRAINTs + +SELECT 42; diff --git a/qiita_db/support_files/patches/python_patches/54.py b/qiita_db/support_files/patches/python_patches/54.py new file mode 100644 index 000000000..1dc576004 --- /dev/null +++ b/qiita_db/support_files/patches/python_patches/54.py @@ -0,0 +1,691 @@ +# The code is commented with details on the changes implemented here, +# but here is an overview of the changes needed to transfer the analysis +# data to the plugins structure: +# 1) Create a new type plugin to define the diversity types +# 2) Create the new commands on the existing QIIME plugin to execute the +# existing analyses (beta div, taxa summaries and alpha rarefaction) +# 3) Transfer all the data in the old structures to the plugin structures +# 4) Delete old structures + +from string import ascii_letters, digits +from random import SystemRandom +from os.path import join, exists, basename +from os import makedirs +from json import loads + +from biom import load_table, Table +from biom.util import biom_open + +from qiita_db.sql_connection import TRN +from qiita_db.util import (get_db_files_base_dir, purge_filepaths, + get_mountpoint, compute_checksum) +from qiita_db.artifact import Artifact + +# Create some aux functions that are going to make the code more modular +# and easier to understand, since there is a fair amount of work to do to +# trasnfer the data from the old structure to the new one + + +def get_random_string(length): + """Creates a random string of the given length with alphanumeric chars + + Parameters + ---------- + length : int + The desired length of the string + + Returns + ------- + str + The new random string + """ + sr = SystemRandom() + chars = ascii_letters + digits + return ''.join(sr.choice(chars) for i in range(length)) + + +def create_non_rarefied_biom_artifact(analysis, biom_data, rarefied_table): + """Creates the initial non-rarefied BIOM artifact of the analysis + + Parameters + ---------- + analysis : dict + Dictionary with the analysis information + biom_data : dict + Dictionary with the biom file information + rarefied_table : biom.Table + The rarefied BIOM table + + Returns + ------- + int + The id of the new artifact + """ + # The non rarefied biom artifact is the initial biom table of the analysis. + # This table does not currently exist anywhere, so we need to actually + # create the BIOM file. To create this BIOM file we need: (1) the samples + # and artifacts they come from and (2) whether the samples where + # renamed or not. (1) is on the database, but we need to inferr (2) from + # the existing rarefied BIOM table. Fun, fun... + + with TRN: + # Get the samples included in the BIOM table grouped by artifact id + # Note that the analysis contains a BIOM table per data type included + # in it, and the table analysis_sample does not differentiate between + # datatypes, so we need to check the data type in the artifact table + sql = """SELECT artifact_id, array_agg(sample_id) + FROM qiita.analysis_sample + JOIN qiita.artifact USING (artifact_id) + WHERE analysis_id = %s AND data_type_id = %s + GROUP BY artifact_id""" + TRN.add(sql, [analysis['analysis_id'], biom_data['data_type_id']]) + samples_by_artifact = TRN.execute_fetchindex() + + # Create an empty BIOM table to be the new master table + new_table = Table([], [], []) + ids_map = {} + for a_id, samples in samples_by_artifact: + # Get the filepath of the BIOM table from the artifact + artifact = Artifact(a_id) + biom_fp = None + for _, fp, fp_type in artifact.filepaths: + if fp_type == 'biom': + biom_fp = fp + # Note that we are sure that the biom table exists for sure, so + # no need to check if biom_fp is undefined + biom_table = load_table(biom_fp) + samples = set(samples).intersection(biom_table.ids()) + biom_table.filter(samples, axis='sample', inplace=True) + # we need to check if the table has samples left before merging + if biom_table.shape[0] != 0 and biom_table.shape[1] != 0: + new_table = new_table.merge(biom_table) + ids_map.update({sid: "%d.%s" % (a_id, sid) + for sid in biom_table.ids()}) + + # Check if we need to rename the sample ids in the biom table + new_table_ids = set(new_table.ids()) + if not new_table_ids.issuperset(rarefied_table.ids()): + # We need to rename the sample ids + new_table.update_ids(ids_map, 'sample', True, True) + + sql = """INSERT INTO qiita.artifact + (generated_timestamp, data_type_id, visibility_id, + artifact_type_id, submitted_to_vamps) + VALUES (%s, %s, %s, %s, %s) + RETURNING artifact_id""" + # Magic number 4 -> visibility sandbox + # Magix number 7 -> biom artifact type + TRN.add(sql, [analysis['timestamp'], biom_data['data_type_id'], + 4, 7, False]) + artifact_id = TRN.execute_fetchlast() + + # Associate the artifact with the analysis + sql = """INSERT INTO qiita.analysis_artifact + (analysis_id, artifact_id) + VALUES (%s, %s)""" + TRN.add(sql, [analysis['analysis_id'], artifact_id]) + # Link the artifact with its file + dd_id, mp = get_mountpoint('BIOM')[0] + dir_fp = join(get_db_files_base_dir(), mp, str(artifact_id)) + if not exists(dir_fp): + makedirs(dir_fp) + new_table_fp = join(dir_fp, "biom_table.biom") + with biom_open(new_table_fp, 'w') as f: + new_table.to_hdf5(f, "Generated by Qiita") + + sql = """INSERT INTO qiita.filepath + (filepath, filepath_type_id, checksum, + checksum_algorithm_id, data_directory_id) + VALUES (%s, %s, %s, %s, %s) + RETURNING filepath_id""" + # Magic number 7 -> filepath_type_id = 'biom' + # Magic number 1 -> the checksum algorithm id + TRN.add(sql, [basename(new_table_fp), 7, + compute_checksum(new_table_fp), 1, dd_id]) + fp_id = TRN.execute_fetchlast() + sql = """INSERT INTO qiita.artifact_filepath + (artifact_id, filepath_id) + VALUES (%s, %s)""" + TRN.add(sql, [artifact_id, fp_id]) + TRN.execute() + + return artifact_id + + +def create_rarefaction_job(depth, biom_artifact_id, analysis, srare_cmd_id): + """Create a new rarefaction job + + Parameters + ---------- + depth : int + The rarefaction depth + biom_artifact_id : int + The artifact id of the input rarefaction biom table + analysis : dict + Dictionary with the analysis information + srare_cmd_id : int + The command id of the single rarefaction command + + Returns + ------- + job_id : str + The job id + params : str + The job parameters + """ + # Add the row in the procesisng job table + params = ('{"depth":%d,"subsample_multinomial":false,"biom_table":%s}' + % (depth, biom_artifact_id)) + with TRN: + # magic number 3: status -> success + sql = """INSERT INTO qiita.processing_job + (email, command_id, command_parameters, + processing_job_status_id) + VALUES (%s, %s, %s, %s) + RETURNING processing_job_id""" + TRN.add(sql, [analysis['email'], srare_cmd_id, params, 3]) + job_id = TRN.execute_fetchlast() + # Step 1.2.b: Link the job with the input artifact + sql = """INSERT INTO qiita.artifact_processing_job + (artifact_id, processing_job_id) + VALUES (%s, %s)""" + TRN.add(sql, [biom_artifact_id, job_id]) + TRN.execute() + return job_id, params + + +def transfer_file_to_artifact(analysis_id, a_timestamp, command_id, + data_type_id, params, artifact_type_id, + filepath_id): + """Creates a new artifact with the given filepath id + + Parameters + ---------- + analysis_id : int + The analysis id to attach the artifact + a_timestamp : datetime.datetime + The generated timestamp of the artifact + command_id : int + The command id of the artifact + data_type_id : int + The data type id of the artifact + params : str + The parameters of the artifact + artifact_type_id : int + The artifact type + filepath_id : int + The filepath id + + Returns + ------- + int + The artifact id + """ + with TRN: + # Add the row in the artifact table + # Magic number 4: Visibility -> sandbox + sql = """INSERT INTO qiita.artifact + (generated_timestamp, command_id, data_type_id, + command_parameters, visibility_id, artifact_type_id, + submitted_to_vamps) + VALUES (%s, %s, %s, %s, %s, %s, %s) + RETURNING artifact_id""" + TRN.add(sql, [a_timestamp, command_id, data_type_id, params, 4, + artifact_type_id, False]) + artifact_id = TRN.execute_fetchlast() + # Link the artifact with its file + sql = """INSERT INTO qiita.artifact_filepath (artifact_id, filepath_id) + VALUES (%s, %s)""" + TRN.add(sql, [artifact_id, filepath_id]) + # Link the artifact with the analysis + sql = """INSERT INTO qiita.analysis_artifact + (analysis_id, artifact_id) + VALUES (%s, %s)""" + TRN.add(sql, [analysis_id, artifact_id]) + + return artifact_id + + +def create_rarefied_biom_artifact(analysis, srare_cmd_id, biom_data, params, + parent_biom_artifact_id, rarefaction_job_id, + srare_cmd_out_id): + """Creates the rarefied biom artifact + + Parameters + ---------- + analysis : dict + The analysis information + srare_cmd_id : int + The command id of "Single Rarefaction" + biom_data : dict + The biom information + params : str + The processing parameters + parent_biom_artifact_id : int + The parent biom artifact id + rarefaction_job_id : str + The job id of the rarefaction job + srare_cmd_out_id : int + The id of the single rarefaction output + + Returns + ------- + int + The artifact id + """ + with TRN: + # Transfer the file to an artifact + # Magic number 7: artifact type -> biom + artifact_id = transfer_file_to_artifact( + analysis['analysis_id'], analysis['timestamp'], srare_cmd_id, + biom_data['data_type_id'], params, 7, biom_data['filepath_id']) + # Link the artifact with its parent + sql = """INSERT INTO qiita.parent_artifact (artifact_id, parent_id) + VALUES (%s, %s)""" + TRN.add(sql, [artifact_id, parent_biom_artifact_id]) + # Link the artifact as the job output + sql = """INSERT INTO qiita.artifact_output_processing_job + (artifact_id, processing_job_id, command_output_id) + VALUES (%s, %s, %s)""" + TRN.add(sql, [artifact_id, rarefaction_job_id, srare_cmd_out_id]) + return artifact_id + + +def transfer_job(analysis, command_id, params, input_artifact_id, job_data, + cmd_out_id, biom_data, output_artifact_type_id): + """Transfers the job from the old structure to the plugin structure + + Parameters + ---------- + analysis : dict + The analysis information + command_id : int + The id of the command executed + params : str + The parameters used in the job + input_artifact_id : int + The id of the input artifact + job_data : dict + The job information + cmd_out_id : int + The id of the command's output + biom_data : dict + The biom information + output_artifact_type_id : int + The type of the output artifact + """ + with TRN: + # Create the job + # Add the row in the processing job table + # Magic number 3: status -> success + sql = """INSERT INTO qiita.processing_job + (email, command_id, command_parameters, + processing_job_status_id) + VALUES (%s, %s, %s, %s) + RETURNING processing_job_id""" + TRN.add(sql, [analysis['email'], command_id, params, 3]) + job_id = TRN.execute_fetchlast() + + # Link the job with the input artifact + sql = """INSERT INTO qiita.artifact_processing_job + (artifact_id, processing_job_id) + VALUES (rarefied_biom_id, proc_job_id)""" + TRN.add(sql, [input_artifact_id, job_id]) + + # Check if the executed job has results and add them + sql = """SELECT EXISTS(SELECT * + FROM qiita.job_results_filepath + WHERE job_id = %s)""" + TRN.add(sql, [job_data['job_id']]) + if TRN.execute_fetchlast(): + # There are results for the current job. + # Transfer the job files to a new artifact + sql = """SELECT filepath_id + FROM qiita.job_results_filepath + WHERE job_id = %s""" + TRN.add(sql, job_data['job_id']) + filepath_id = TRN.execute_fetchlast() + artifact_id = transfer_file_to_artifact( + analysis['analysis_id'], analysis['timestamp'], command_id, + biom_data['data_type_id'], params, output_artifact_type_id, + filepath_id) + + # Link the artifact with its parent + sql = """INSERT INTO qiita.parent_artifact (artifact_id, parent_id) + VALUES (%s, %s)""" + TRN.add(sql, [artifact_id, input_artifact_id]) + # Link the artifact as the job output + sql = """INSERT INTO qiita.artifact_output_processing_job + (artifact_id, processing_job_id, command_output_id) + VALUES (%s, %s, %s)""" + TRN.add(sql, [artifact_id, job_id, cmd_out_id]) + TRN.exeucte() + else: + # There are no results on the current job, so mark it as + # error + if job_data.log_id is None: + # Magic number 2 - we are not using any other severity + # level, so keep using number 2 + sql = """INSERT INTO qiita.logging (time, severity_id, msg) + VALUES (%s, %s, %s) + RETURNING logging_id""" + TRN.add(sql, [analysis['timestamp'], 2, + "Unknown error - patch 47"]) + else: + log_id = job_data['log_id'] + + # Magic number 4 -> status -> error + sql = """UPDATE qiita.processing_job + SET processing_job_status_id = 4, logging_id = %s + WHERE processing_job_id = %s""" + TRN.add(sql, [log_id, job_id]) + + +# The new commands that we are going to add generate new artifact types. +# These new artifact types are going to be added to a different plugin. +# In interest of time and given that the artifact type system is going to +# change in the near future, we feel that the easiest way to transfer +# the current analyses results is by creating 3 different types of +# artifacts: (1) distance matrix -> which will include the distance matrix, +# the principal coordinates and the emperor plots; (2) rarefaction +# curves -> which will include all the files generated by alpha rarefaction +# and (3) taxonomy summary, which will include all the files generated +# by summarize_taxa_through_plots.py + +with TRN: + # Add the new artifact types + sql = """INSERT INTO qiita.artifact_type ( + artifact_type, description, can_be_submitted_to_ebi, + can_be_submitted_to_vamps) + VALUES (%s, %s, %s, %s) + RETURNING artifact_type_id""" + TRN.add(sql, ['beta_div_plots', 'Qiime 1 beta diversity results', + False, False]) + dm_atype_id = TRN.execute_fetchlast() + TRN.add(sql, ['rarefaction_curves', 'Rarefaction curves', False, False]) + rc_atype_id = TRN.execute_fetchlast() + TRN.add(sql, ['taxa_summary', 'Taxa summary plots', False, False]) + ts_atype_id = TRN.execute_fetchlast() + + # Associate each artifact with the filetypes that it accepts + # At this time we are going to add them as directories, just as it is done + # right now. We can make it fancier with the new type system. + # Magic number 8: the filepath_type_id for the directory + sql = """INSERT INTO qiita.artifact_type_filepath_type + (artifact_type_id, filepath_type_id, required) + VALUES (%s, %s, %s)""" + sql_args = [[dm_atype_id, 8, True], + [rc_atype_id, 8, True], + [ts_atype_id, 8, True]] + TRN.add(sql, sql_args, many=True) + + # Create the new commands that execute the current analyses. In qiita, + # the only commands that where available are Summarize Taxa, Beta + # Diversity and Alpha Rarefaction. The system was executing rarefaction + # by default, but it should be a different step in the analysis process + # so we are going to create a command for it too. These commands are going + # to be part of the QIIME plugin, so we are going to first retrieve the + # id of the QIIME 1.9.1 plugin, which for sure exists cause it was added + # in patch 33 and there is no way of removing plugins + + # Step 1: Get the QIIME plugin id + sql = """SELECT software_id + FROM qiita.software + WHERE name = 'QIIME' AND version = '1.9.1'""" + TRN.add(sql) + qiime_id = TRN.execute_fetchlast() + + # Step 2: Insert the new commands in the software_command table + sql = """INSERT INTO qiita.software_command + (software_id, name, description, is_analysis) + VALUES (%s, %s, %s, TRUE) + RETURNING command_id""" + TRN.add(sql, [qiime_id, 'Summarize Taxa', 'Plots taxonomy summaries at ' + 'different taxonomy levels']) + sum_taxa_cmd_id = TRN.execute_fetchlast() + TRN.add(sql, [qiime_id, 'Beta Diversity', + 'Computes and plots beta diversity results']) + bdiv_cmd_id = TRN.execute_fetchlast() + TRN.add(sql, [qiime_id, 'Alpha Rarefaction', + 'Computes and plots alpha rarefaction results']) + arare_cmd_id = TRN.execute_fetchlast() + TRN.add(sql, [qiime_id, 'Single Rarefaction', + 'Rarefies the input table by random sampling without ' + 'replacement']) + srare_cmd_id = TRN.execute_fetchlast() + + # Step 3: Insert the parameters for each command + sql = """INSERT INTO qiita.command_parameter + (command_id, parameter_name, parameter_type, required, + default_value) + VALUES (%s, %s, %s, %s, %s) + RETURNING command_parameter_id""" + sql_args = [ + # Summarize Taxa + (sum_taxa_cmd_id, 'metadata_category', 'string', False, ''), + (sum_taxa_cmd_id, 'sort', 'bool', False, 'False'), + # Beta Diversity + (bdiv_cmd_id, 'tree', 'string', False, ''), + (bdiv_cmd_id, 'metric', + 'choice:["abund_jaccard","binary_chisq","binary_chord",' + '"binary_euclidean","binary_hamming","binary_jaccard",' + '"binary_lennon","binary_ochiai","binary_otu_gain","binary_pearson",' + '"binary_sorensen_dice","bray_curtis","bray_curtis_faith",' + '"bray_curtis_magurran","canberra","chisq","chord","euclidean",' + '"gower","hellinger","kulczynski","manhattan","morisita_horn",' + '"pearson","soergel","spearman_approx","specprof","unifrac",' + '"unifrac_g","unifrac_g_full_tree","unweighted_unifrac",' + '"unweighted_unifrac_full_tree","weighted_normalized_unifrac",' + '"weighted_unifrac"]', False, '"binary_jaccard"'), + # Alpha rarefaction + (arare_cmd_id, 'tree', 'string', False, ''), + (arare_cmd_id, 'num_steps', 'integer', False, 10), + (arare_cmd_id, 'min_rare_depth', 'integer', False, 10), + (arare_cmd_id, 'max_rare_depth', 'integer', False, 'Default'), + (arare_cmd_id, 'metrics', + 'mchoice:["ace","berger_parker_d","brillouin_d","chao1","chao1_ci",' + '"dominance","doubles","enspie","equitability","esty_ci",' + '"fisher_alpha","gini_index","goods_coverage","heip_e",' + '"kempton_taylor_q","margalef","mcintosh_d","mcintosh_e",' + '"menhinick","michaelis_menten_fit","observed_otus",' + '"observed_species","osd","simpson_reciprocal","robbins",' + '"shannon","simpson","simpson_e","singles","strong","PD_whole_tree"]', + False, '["chao1","observed_otus"]'), + # Single rarefaction + (srare_cmd_id, 'depth', 'integer', True, None), + (srare_cmd_id, 'subsample_multinomial', 'bool', False, 'False') + ] + TRN.add(sql, sql_args, many=True) + + TRN.add(sql, [sum_taxa_cmd_id, 'biom_table', 'artifact', True, None]) + sum_taxa_cmd_param_id = TRN.execute_fetchlast() + TRN.add(sql, [bdiv_cmd_id, 'biom_table', 'artifact', True, None]) + bdiv_cmd_param_id = TRN.execute_fetchlast() + TRN.add(sql, [arare_cmd_id, 'biom_table', 'artifact', True, None]) + arare_cmd_param_id = TRN.execute_fetchlast() + TRN.add(sql, [srare_cmd_id, 'biom_table', 'artifact', True, None]) + srare_cmd_param_id = TRN.execute_fetchlast() + + # Step 4: Connect the artifact parameters with the artifact types that + # they accept + sql = """SELECT artifact_type_id + FROM qiita.artifact_type + WHERE artifact_type = 'BIOM'""" + TRN.add(sql) + biom_atype_id = TRN.execute_fetchlast() + + sql = """INSERT INTO qiita.parameter_artifact_type + (command_parameter_id, artifact_type_id) + VALUES (%s, %s)""" + sql_args = [[sum_taxa_cmd_param_id, biom_atype_id], + [bdiv_cmd_param_id, biom_atype_id], + [arare_cmd_param_id, biom_atype_id], + [srare_cmd_param_id, biom_atype_id]] + TRN.add(sql, sql_args, many=True) + + # Step 5: Add the outputs of the command. + sql = """INSERT INTO qiita.command_output + (name, command_id, artifact_type_id) + VALUES (%s, %s, %s) + RETURNING command_output_id""" + TRN.add(sql, ['taxa_summary', sum_taxa_cmd_id, ts_atype_id]) + sum_taxa_cmd_out_id = TRN.execute_fetchlast() + TRN.add(sql, ['distance_matrix', bdiv_cmd_id, dm_atype_id]) + bdiv_cmd_out_id = TRN.execute_fetchlast() + TRN.add(sql, ['rarefaction_curves', arare_cmd_id, rc_atype_id]) + arare_cmd_out_id = TRN.execute_fetchlast() + TRN.add(sql, ['rarefied_table', srare_cmd_id, biom_atype_id]) + srare_cmd_out_id = TRN.execute_fetchlast() + + # Step 6: Add default parameter sets + sql = """INSERT INTO qiita.default_parameter_set + (command_id, parameter_set_name, parameter_set) + VALUES (%s, %s, %s)""" + sql_args = [ + [sum_taxa_cmd_id, 'Defaults', + '{"sort": false, "metadata_category": ""}'], + [bdiv_cmd_id, 'Unweighted UniFrac', + '{"metric": "unweighted_unifrac", "tree": ""}'], + [arare_cmd_id, 'Defaults', + '{"max_rare_depth": "Default", "tree": "", "num_steps": 10, ' + '"min_rare_depth": 10, "metrics": ["chao1", "observed_otus"]}'], + [srare_cmd_id, 'Defaults', + '{"subsample_multinomial": "False"}']] + TRN.add(sql, sql_args, many=True) + +# At this point we are ready to start transferring the data from the old +# structures to the new structures. Overview of the procedure: +# Step 1: Add initial set of artifacts up to rarefied table +# Step 2: Transfer the "analisys jobs" to processing jobs and create +# the analysis artifacts +db_dir = get_db_files_base_dir() +with TRN: + sql = "SELECT * FROM qiita.analysis" + TRN.add(sql) + analysis_info = TRN.execute_fetchindex() + + # Loop through all the analysis + for analysis in analysis_info: + # Step 1: Add the inital set of artifacts. An analysis starts with + # a set of BIOM artifacts. + sql = """SELECT * + FROM qiita.analysis_filepath + JOIN qiita.filepath USING (filepath_id) + JOIN qiita.filepath_type USING (filepath_type_id) + WHERE analysis_id = %s AND filepath_type = 'biom'""" + TRN.add(sql, [analysis['analysis_id']]) + analysis_bioms = TRN.execute_fetchindex() + + # Loop through all the biom tables associated with the current analysis + # so we can create the initial set of artifacts + for biom_data in analysis_bioms: + # Get the path of the BIOM table + sql = """SELECT filepath, mountpoint + FROM qiita.filepath + JOIN qiita.data_directory USING (data_directory_id) + WHERE filepath_id = %s""" + TRN.add(sql, [biom_data['filepath_id']]) + # Magic number 0: There is only a single row in the query result + fp_info = TRN.execute_fetchindex()[0] + filepath = join(db_dir, fp_info['mountpoint'], fp_info['filepath']) + + # We need to check if the BIOM table has been rarefied or not + table = load_table(filepath) + depths = set(table.sum(axis='sample')) + if len(depths) == 1: + # The BIOM table was rarefied + # Create the initial unrarefied artifact + initial_biom_artifact_id = create_non_rarefied_biom_artifact( + analysis, biom_data, table) + # Create the rarefaction job + rarefaction_job_id, params = create_rarefaction_job( + depths.pop(), initial_biom_artifact_id, analysis, + srare_cmd_id) + # Create the rarefied artifact + rarefied_biom_artifact_id = create_rarefied_biom_artifact( + analysis, srare_cmd_id, biom_data, params, + initial_biom_artifact_id, rarefaction_job_id, + srare_cmd_out_id) + else: + # The BIOM table was not rarefied, use current table as initial + initial_biom_id = transfer_file_to_artifact( + analysis['analysis_id'], analysis['timestamp'], None, + biom_data['data_type_id'], None, 7, + biom_data['filepath_id']) + + # Loop through all the jobs that used this biom table as input + sql = """SELECT * + FROM qiita.job + WHERE reverse(split_part(reverse( + options::json->>'--otu_table_fp'), '/', 1)) = %s""" + TRN.add(sql, [filepath]) + analysis_jobs = TRN.execute_fetchindex() + for job_data in analysis_jobs: + # Identify which command the current job exeucted + if job_data['command_id'] == 1: + # Taxa summaries + cmd_id = sum_taxa_cmd_id + params = ('{"biom_table":%d,"metadata_category":"",' + '"sort":false}' % initial_biom_id) + output_artifact_type_id = ts_atype_id + cmd_out_id = sum_taxa_cmd_out_id + elif job_data['command_id'] == 2: + # Beta diversity + cmd_id = bdiv_cmd_id + tree_fp = loads(job_data['options'])['--tree_fp'] + if tree_fp: + params = ('{"biom_table":%d,"tree":"%s","metrics":' + '["unweighted_unifrac","weighted_unifrac"]}' + % (initial_biom_id, tree_fp)) + else: + params = ('{"biom_table":%d,"metrics":["bray_curtis",' + '"gower","canberra","pearson"]}' + % initial_biom_id) + output_artifact_type_id = dm_atype_id + cmd_out_id = bdiv_cmd_out_id + else: + # Alpha rarefaction + cmd_id = arare_cmd_id + tree_fp = loads(job_data['options'])['--tree_fp'] + params = ('{"biom_table":%d,"tree":"%s","num_steps":"10",' + '"min_rare_depth":"10",' + '"max_rare_depth":"Default"}' + % (initial_biom_id, tree_fp)) + output_artifact_type_id = rc_atype_id + cmd_out_id = arare_cmd_out_id + + transfer_job(analysis, cmd_id, params, initial_biom_id, + job_data, cmd_out_id, biom_data, + output_artifact_type_id) + +errors = [] +with TRN: + # Unlink the analysis from the biom table filepaths + # Magic number 7 -> biom filepath type + sql = """DELETE FROM qiita.analysis_filepath + WHERE filepath_id IN (SELECT filepath_id + FROM qiita.filepath + WHERE filepath_type_id = 7)""" + TRN.add(sql) + TRN.execute() + + # Delete old structures that are not used anymore + tables = ["collection_job", "collection_analysis", "collection_users", + "collection", "collection_status", "analysis_workflow", + "analysis_chain", "analysis_job", "job_results_filepath", "job", + "job_status", "command_data_type", "command", "analysis_status"] + for table in tables: + TRN.add("DROP TABLE qiita.%s" % table) + try: + TRN.execute() + except Exception as e: + errors.append("Error deleting table %s: %s" % (table, str(e))) + +# Purge filepaths +try: + purge_filepaths() +except Exception as e: + errors.append("Error purging filepaths: %s" % str(e)) + +if errors: + print "\n".join(errors) diff --git a/qiita_db/support_files/patches/python_patches/55.py b/qiita_db/support_files/patches/python_patches/55.py new file mode 100644 index 000000000..4dd25bb79 --- /dev/null +++ b/qiita_db/support_files/patches/python_patches/55.py @@ -0,0 +1,21 @@ +from qiita_db.sql_connection import TRN + +sql = """ + SELECT constraint_name AS cname, 'qiita.' || table_name AS tname + FROM information_schema.table_constraints + WHERE constraint_type ='FOREIGN KEY' AND ( + (constraint_name LIKE 'fk_sample_%' AND table_name LIKE 'sample_%') OR + (constraint_name LIKE 'fk_prep_%' AND table_name LIKE 'prep_%')) AND + table_name NOT IN ( + 'prep_template', 'prep_template_sample', 'prep_template_filepath', + 'prep_template_processing_job')""" + +with TRN: + TRN.add(sql) + to_delete = TRN.execute_fetchindex() + +for cname, tname in to_delete: + with TRN: + sql = "ALTER TABLE %s DROP CONSTRAINT %s" % (tname, cname) + TRN.add(sql) + TRN.execute() diff --git a/qiita_db/support_files/qiita-db.dbs b/qiita_db/support_files/qiita-db.dbs index 1f7ebe2f5..84ac07efc 100644 --- a/qiita_db/support_files/qiita-db.dbs +++ b/qiita_db/support_files/qiita-db.dbs @@ -14,7 +14,6 @@ - @@ -25,47 +24,47 @@ false + - - - + + + - - - + + + - - Keeps track of the chain of analysis edits. Tracks what previous analysis a given analysis came from.If a given analysis is not in child_id, it is the root of the chain. - - - - +
+ + + + - - + + - - - + + + - - + + - - + +
@@ -96,31 +95,6 @@
- - Holds information for a one-to-many relation of analysis to the jobs in it - - - - - - - - - - - - - - - - - - - - - - -
Controls what analyses are visible on what portals @@ -138,6 +112,26 @@
+ + + + + + + + + + + + + + + + + + + +
@@ -166,16 +160,6 @@
- - - - - - - - - -
Links analyses to the users they are shared with @@ -197,17 +181,6 @@
- - Stores what step in_production analyses are on. - - - - - - - - -
Represents data in the system @@ -367,101 +340,6 @@
- - Tracks a group of analyses and important jobs for an overarching goal. - - - - - - 1 - - - - - - - - - - - - - - - - -
- - Matches collection to analyses as one to many. - - - - - - - - - - - - - - - - - - -
- - Matches collection important jobs as one to many. - - - - - - - - - - - - - - - - - - -
- - - - - - -
- - Allows sharing of a collection - - - - - - - - - - - - - - - - - - -
Table relates a column with a controlled vocabulary. @@ -500,51 +378,6 @@
- - Available commands for jobs - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - - - - - - - - - - - - - -
@@ -859,96 +692,6 @@
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - Holds connection between jobs and the result filepaths - - - - - - - - - - - - - - - - - - -
- - - - - - - - - -
@@ -1519,6 +1262,9 @@ 'True' + + 'False' + @@ -1879,16 +1625,11 @@ Controlled Vocabulary]]> - - - - - @@ -1896,16 +1637,9 @@ Controlled Vocabulary]]> - - - - - - - @@ -1932,7 +1666,6 @@ Controlled Vocabulary]]> - @@ -1958,8 +1691,6 @@ Controlled Vocabulary]]> - - @@ -1971,21 +1702,13 @@ Controlled Vocabulary]]> + analysis tables - - - - - - - - - @@ -2025,13 +1748,6 @@ Controlled Vocabulary]]> - - - - - - - @@ -2090,7 +1806,7 @@ Controlled Vocabulary]]> + + + + +{% end %} +{% block content %} + +
+
+

+ {{analysis_name}} - ID {{analysis_id}} + +

+

{{analysis_description}}

+ Shared with: +
+
+
+
+
+

- Processing network

+ (Click nodes for more information, blue are jobs) +
+
+
+
+
+
+ +
+
+
+
+
+
+
+
+
+
+
+
+ + + + + +{% end %} diff --git a/qiita_pet/templates/analysis_selected.html b/qiita_pet/templates/analysis_selected.html index f9095d10c..74ff6df01 100644 --- a/qiita_pet/templates/analysis_selected.html +++ b/qiita_pet/templates/analysis_selected.html @@ -133,7 +133,7 @@

Processed Data

-
+ - +
+ + + + + + + + + + {% for analysis in analyses %} + {% set _id = analysis.id %} + + + + + + + + {% end %} + + +
ArtifactsAnalysis NameTimestampMapping FileDelete?
+ {{[ar.id for ar in analysis.artifacts]}} + + {{analysis.name}} + {% if analysis.description %} + ({{analysis.description}}) + {% end %} + + {{ analysis.timestamp.strftime("%m/%d/%y %H:%M:%S")}} + + {% raw mappings[_id] %} + + +
+ +{% end %} + +{% end %} diff --git a/qiita_pet/templates/list_studies.html b/qiita_pet/templates/list_studies.html index d3a4de4ac..6b13975a5 100644 --- a/qiita_pet/templates/list_studies.html +++ b/qiita_pet/templates/list_studies.html @@ -5,11 +5,6 @@ - @@ -18,33 +13,27 @@ -{% end %} - -{%block content %} - -
-

No Analyses available

-

Create an analysis

-
- -{% if analyses %} - - - - - - - - - - - - {% for analysis in analyses %} - {% set status = analysis.status %} - {% set _id = analysis.id %} - {% if status == "in_construction"%} - - - - - - - - - - - - - - - {% end %} -
Analysis NameAnalysis DescriptionStatusTimestampMapping FileBiom Filestgz FilesDelete?
- {{analysis.name}} - {% elif status == "running" %} -
- {{analysis.name}} - {% elif status == "completed" %} -
- {{analysis.name}} - {% elif status == "error" %} -
- {{analysis.name}} - {% else %} -
- {{analysis.name}} - {% end %} - - {{analysis.description}} - - {{status}} - - {{ analysis.timestamp.strftime("%m/%d/%y %H:%M:%S")}} - - {% raw mappings[_id] %} - - {% raw bioms[_id] %} - - {% raw tgzs[_id] %} - - -
-{% end %} - -{% end %} diff --git a/qiita_pet/templates/sitebase.html b/qiita_pet/templates/sitebase.html index a5002a869..12a3ccb57 100644 --- a/qiita_pet/templates/sitebase.html +++ b/qiita_pet/templates/sitebase.html @@ -35,6 +35,8 @@ + + {% if portal_styling.custom_css %}