diff --git a/.travis.yml b/.travis.yml
index 5a191929f..d7b5b0555 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -4,7 +4,9 @@ env:
global:
- PYTHON_VERSION=2.7
matrix:
- - TEST_ADD_STUDIES=False
+ - TEST_ADD_STUDIES=False COVER_PACKAGE=qiita_db
+ - TEST_ADD_STUDIES=False COVER_PACKAGE=qiita_pet
+ - TEST_ADD_STUDIES=False COVER_PACKAGE="qiita_core qiita_ware"
- TEST_ADD_STUDIES=True
before_install:
- redis-server --version
@@ -18,7 +20,9 @@ before_install:
- wget ftp://ftp.microbio.me/pub/qiita/ascp-install-3.5.4.102989-linux-64-qiita.sh -O ascp-install-3.5.4.102989-linux-64-qiita.sh
- chmod +x ascp-install-3.5.4.102989-linux-64-qiita.sh
- ./ascp-install-3.5.4.102989-linux-64-qiita.sh
- - if [ ${TRAVIS_PULL_REQUEST} == "false" ]; then openssl aes-256-cbc -K $encrypted_a2e23aea5f14_key -iv $encrypted_a2e23aea5f14_iv -in qiita_core/support_files/config_test_travis.cfg.enc -out qiita_core/support_files/config_test_travis.cfg -d ; fi
+ # once we have ebi testing we should uncomment this line
+ # look for EBI below as it's part of this
+ # - if [ ${TRAVIS_PULL_REQUEST} == "false" ]; then openssl aes-256-cbc -K $encrypted_a2e23aea5f14_key -iv $encrypted_a2e23aea5f14_iv -in qiita_core/support_files/config_test_travis.cfg.enc -out qiita_core/support_files/config_test_travis.cfg -d ; fi
install:
# install a few of the dependencies that pip would otherwise try to install
# when intalling scikit-bio
@@ -27,22 +31,37 @@ install:
'pandas>=0.18' 'matplotlib>=1.1.0' 'scipy>0.13.0' 'numpy>=1.7' 'h5py>=2.3.1'
- source activate qiita
- pip install -U pip
- - pip install sphinx sphinx-bootstrap-theme coveralls 'ipython[all]==2.4.1'
+ - pip install sphinx sphinx-bootstrap-theme 'ipython[all]==2.4.1' nose-timer codecov
- travis_retry pip install . --process-dependency-links
- 'echo "backend: Agg" > matplotlibrc'
-script:
+ # Install the biom plugin so we can run the analysis tests
+ - pip install https://github.com/qiita-spots/qiita_client/archive/master.zip
+ - pip install https://github.com/qiita-spots/qtp-biom/archive/master.zip --process-dependency-links
+ - export QIITA_SERVER_CERT=`pwd`/qiita_core/support_files/server.crt
+ - mkdir ~/.qiita_plugins
+ - cp $PWD/qiita_core/support_files/BIOM\ type_2.1.4.conf ~/.qiita_plugins
+before_script:
+ # Some of the tests rely on the plugin system to complete successfuly.
+ # Thus, we need a qiita webserver running to be able to execute the tests.
- export MOI_CONFIG_FP=`pwd`/qiita_core/support_files/config_test.cfg
- - if [ ${TRAVIS_PULL_REQUEST} == "false" ]; then
- export QIITA_CONFIG_FP=`pwd`/qiita_core/support_files/config_test_travis.cfg;
- export MOI_CONFIG_FP=`pwd`/qiita_core/support_files/config_test_travis.cfg;
- fi
+ # EBI, see the end of before_install about why this block is commented out
+ # - if [ ${TRAVIS_PULL_REQUEST} == "false" ]; then
+ # export QIITA_CONFIG_FP=`pwd`/qiita_core/support_files/config_test_travis.cfg;
+ # export MOI_CONFIG_FP=`pwd`/qiita_core/support_files/config_test_travis.cfg;
+ # fi
- ipython profile create qiita-general --parallel
- qiita-env start_cluster qiita-general
- qiita-env make --no-load-ontologies
+ - |
+ if [ ${TEST_ADD_STUDIES} == "False" ]; then
+ qiita pet webserver --no-build-docs start &
+ fi
+script:
+ - sleep 5
- if [ ${TEST_ADD_STUDIES} == "True" ]; then test_data_studies/commands.sh ; fi
- if [ ${TEST_ADD_STUDIES} == "True" ]; then qiita-cron-job ; fi
- if [ ${TEST_ADD_STUDIES} == "False" ]; then qiita-test-install ; fi
- - if [ ${TEST_ADD_STUDIES} == "False" ]; then nosetests --with-doctest --with-coverage -v --cover-package=qiita_db,qiita_pet,qiita_core,qiita_ware; fi
+ - if [ ${TEST_ADD_STUDIES} == "False" ]; then nosetests $COVER_PACKAGE --with-doctest --with-coverage --with-timer -v --cover-package=$COVER_PACKAGE; fi
- flake8 qiita_* setup.py scripts/*
- ls -R /home/travis/miniconda3/envs/qiita/lib/python2.7/site-packages/qiita_pet/support_files/doc/
- qiita pet webserver
@@ -52,4 +71,4 @@ services:
- redis-server
- postgresql
after_success:
- - if [ ${TEST_ADD_STUDIES} == "False" ]; then coveralls ; fi
+ - if [ ${TEST_ADD_STUDIES} == "False" ]; then codecov ; fi
diff --git a/README.rst b/README.rst
index bf3213ab6..770e2f3ec 100644
--- a/README.rst
+++ b/README.rst
@@ -80,7 +80,7 @@ future.
.. |Build Status| image:: https://travis-ci.org/biocore/qiita.png?branch=master
:target: https://travis-ci.org/biocore/qiita
-.. |Coverage Status| image:: https://coveralls.io/repos/biocore/qiita/badge.png?branch=master
- :target: https://coveralls.io/r/biocore/qiita
+.. |Coverage Status| image:: https://codecov.io/gh/biocore/qiita/branch/master/graph/badge.svg
+ :target: https://codecov.io/gh/biocore/qiita
.. |Gitter| image:: https://badges.gitter.im/Join%20Chat.svg
:target: https://gitter.im/biocore/qiita?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge
diff --git a/qiita_core/support_files/BIOM type_2.1.4.conf b/qiita_core/support_files/BIOM type_2.1.4.conf
new file mode 100644
index 000000000..d3eb040b4
--- /dev/null
+++ b/qiita_core/support_files/BIOM type_2.1.4.conf
@@ -0,0 +1,13 @@
+[main]
+NAME = BIOM type
+VERSION = 2.1.4
+DESCRIPTION = The Biological Observation Matrix format
+ENVIRONMENT_SCRIPT = source activate qtp-biom
+START_SCRIPT = start_biom
+PLUGIN_TYPE = artifact definition
+PUBLICATIONS =
+
+[oauth2]
+SERVER_CERT = /home/travis/miniconda3/envs/qiita/lib/python2.7/site-packages/qiita_core/support_files/server.crt
+CLIENT_ID = dHgaXDwq665ksFPqfIoD3Jt8KRXdSioTRa4lGa5mGDnz6JTIBf
+CLIENT_SECRET = xqx61SD4M2EWbaS0WYv3H1nIemkvEAMIn16XMLjy5rTCqi7opCcWbfLINEwtV48bQ
diff --git a/qiita_core/support_files/config_test.cfg b/qiita_core/support_files/config_test.cfg
index 0e91b9176..aeb53ef6e 100644
--- a/qiita_core/support_files/config_test.cfg
+++ b/qiita_core/support_files/config_test.cfg
@@ -23,19 +23,19 @@ LOG_DIR =
REQUIRE_APPROVAL = True
# Base URL: DO NOT ADD TRAILING SLASH
-BASE_URL = https://localhost
+BASE_URL = https://localhost:21174
# Download path files
-UPLOAD_DATA_DIR = /tmp/
+UPLOAD_DATA_DIR = /home/travis/miniconda3/envs/qiita/lib/python2.7/site-packages/qiita_db/support_files/test_data/uploads/
# Working directory path
-WORKING_DIR = /tmp/
+WORKING_DIR = /home/travis/miniconda3/envs/qiita/lib/python2.7/site-packages/qiita_db/support_files/test_data/working_dir/
# Maximum upload size (in Gb)
MAX_UPLOAD_SIZE = 100
# Path to the base directory where the data files are going to be stored
-BASE_DATA_DIR =
+BASE_DATA_DIR = /home/travis/miniconda3/envs/qiita/lib/python2.7/site-packages/qiita_db/support_files/test_data/
# Valid upload extension, comma separated. Empty for no uploads
VALID_UPLOAD_EXTENSION = fastq,fastq.gz,txt,tsv,sff,fna,qual
diff --git a/qiita_core/testing.py b/qiita_core/testing.py
index e05f67a69..22bc9201c 100644
--- a/qiita_core/testing.py
+++ b/qiita_core/testing.py
@@ -43,9 +43,9 @@ def wait_for_prep_information_job(prep_id, raise_if_none=True):
else:
redis_info = loads(r_client.get(job_id))
while redis_info['status_msg'] == 'Running':
- sleep(0.05)
+ sleep(0.5)
redis_info = loads(r_client.get(job_id))
- sleep(0.05)
+ sleep(0.5)
def wait_for_processing_job(job_id):
@@ -58,5 +58,5 @@ def wait_for_processing_job(job_id):
"""
job = ProcessingJob(job_id)
while job.status not in ('success', 'error'):
- sleep(0.05)
- sleep(0.5)
+ sleep(1.2)
+ sleep(1.2)
diff --git a/qiita_db/__init__.py b/qiita_db/__init__.py
index e606bfb83..e9e71d654 100644
--- a/qiita_db/__init__.py
+++ b/qiita_db/__init__.py
@@ -16,7 +16,6 @@
import environment_manager
import exceptions
import investigation
-import job
import logger
import meta_util
import ontology
@@ -27,11 +26,12 @@
import study
import user
import processing_job
+import private
__version__ = "0.2.0-dev"
__all__ = ["analysis", "artifact", "base", "commands", "environment_manager",
- "exceptions", "investigation", "job", "logger", "meta_util",
+ "exceptions", "investigation", "logger", "meta_util",
"ontology", "portal", "reference", "search",
"software", "sql_connection", "study", "user", "util",
- "metadata_template", "processing_job"]
+ "metadata_template", "processing_job", "private"]
diff --git a/qiita_db/analysis.py b/qiita_db/analysis.py
index d4d957429..aa66f9f39 100644
--- a/qiita_db/analysis.py
+++ b/qiita_db/analysis.py
@@ -18,12 +18,12 @@
# -----------------------------------------------------------------------------
from __future__ import division
from itertools import product
-from os.path import join, basename
-from tarfile import open as taropen
+from os.path import join
from future.utils import viewitems
from biom import load_table
from biom.util import biom_open
+from re import sub
import pandas as pd
from qiita_core.exceptions import IncompetentQiitaDeveloperError
@@ -31,7 +31,7 @@
import qiita_db as qdb
-class Analysis(qdb.base.QiitaStatusObject):
+class Analysis(qdb.base.QiitaObject):
"""
Analysis object to access to the Qiita Analysis information
@@ -41,15 +41,11 @@ class Analysis(qdb.base.QiitaStatusObject):
name
description
samples
- dropped_samples
data_types
- biom_tables
- step
+ artifacts
shared_with
jobs
pmid
- parent
- children
Methods
-------
@@ -63,25 +59,14 @@ class Analysis(qdb.base.QiitaStatusObject):
exists
create
delete
+ add_artifact
+ set_error
"""
_table = "analysis"
_portal_table = "analysis_portal"
_analysis_id_column = 'analysis_id'
- def _lock_check(self):
- """Raises QiitaDBStatusError if analysis is not in_progress"""
- if self.check_status({"queued", "running", "public", "completed",
- "error"}):
- raise qdb.exceptions.QiitaDBStatusError("Analysis is locked!")
-
- def _status_setter_checks(self):
- r"""Perform a check to make sure not setting status away from public
- """
- if self.check_status({"public"}):
- raise qdb.exceptions.QiitaDBStatusError(
- "Can't set status away from public!")
-
@classmethod
def get_by_status(cls, status):
"""Returns all Analyses with given status
@@ -97,19 +82,34 @@ def get_by_status(cls, status):
All analyses in the database with the given status
"""
with qdb.sql_connection.TRN:
- sql = """SELECT analysis_id
- FROM qiita.{0}
- JOIN qiita.{0}_status USING (analysis_status_id)
- JOIN qiita.analysis_portal USING (analysis_id)
- JOIN qiita.portal_type USING (portal_type_id)
- WHERE status = %s AND portal = %s""".format(cls._table)
- qdb.sql_connection.TRN.add(sql, [status, qiita_config.portal])
+ # Sandboxed analyses are the analyses that have not been started
+ # and hence they don't have an artifact yet
+ if status == 'sandbox':
+ sql = """SELECT DISTINCT analysis
+ FROM qiita.analysis
+ JOIN qiita.analysis_portal USING (analysis_id)
+ JOIN qiita.portal_type USING (portal_type_id)
+ WHERE portal = %s AND analysis_id NOT IN (
+ SELECT analysis_id
+ FROM qiita.analysis_artifact)"""
+ qdb.sql_connection.TRN.add(sql, [qiita_config.portal])
+ else:
+ sql = """SELECT DISTINCT analysis_id
+ FROM qiita.analysis_artifact
+ JOIN qiita.artifact USING (artifact_id)
+ JOIN qiita.visibility USING (visibility_id)
+ JOIN qiita.analysis_portal USING (analysis_id)
+ JOIN qiita.portal_type USING (portal_type_id)
+ WHERE visibility = %s AND portal = %s"""
+ qdb.sql_connection.TRN.add(sql, [status, qiita_config.portal])
+
return set(
cls(aid)
for aid in qdb.sql_connection.TRN.execute_fetchflatten())
@classmethod
- def create(cls, owner, name, description, parent=None, from_default=False):
+ def create(cls, owner, name, description, from_default=False,
+ merge_duplicated_sample_ids=False):
"""Creates a new analysis on the database
Parameters
@@ -120,49 +120,39 @@ def create(cls, owner, name, description, parent=None, from_default=False):
Name of the analysis
description : str
Description of the analysis
- parent : Analysis object, optional
- The analysis this one was forked from
from_default : bool, optional
If True, use the default analysis to populate selected samples.
Default False.
+ merge_duplicated_sample_ids : bool, optional
+ If the duplicated sample ids in the selected studies should be
+ merged or prepended with the artifact ids. False (default) prepends
+ the artifact id
+
+ Returns
+ -------
+ qdb.analysis.Analysis
+ The newly created analysis
"""
with qdb.sql_connection.TRN:
- status_id = qdb.util.convert_to_id(
- 'in_construction', 'analysis_status', 'status')
portal_id = qdb.util.convert_to_id(
qiita_config.portal, 'portal_type', 'portal')
+ # Create the row in the analysis table
+ sql = """INSERT INTO qiita.{0}
+ (email, name, description)
+ VALUES (%s, %s, %s)
+ RETURNING analysis_id""".format(cls._table)
+ qdb.sql_connection.TRN.add(
+ sql, [owner.id, name, description])
+ a_id = qdb.sql_connection.TRN.execute_fetchlast()
+
if from_default:
- # insert analysis and move samples into that new analysis
+ # Move samples into that new analysis
dflt_id = owner.default_analysis.id
-
- sql = """INSERT INTO qiita.{0}
- (email, name, description, analysis_status_id)
- VALUES (%s, %s, %s, %s)
- RETURNING analysis_id""".format(cls._table)
- qdb.sql_connection.TRN.add(
- sql, [owner.id, name, description, status_id])
- a_id = qdb.sql_connection.TRN.execute_fetchlast()
- # MAGIC NUMBER 3: command selection step
- # needed so we skip the sample selection step
- sql = """INSERT INTO qiita.analysis_workflow
- (analysis_id, step)
- VALUES (%s, %s)"""
- qdb.sql_connection.TRN.add(sql, [a_id, 3])
-
sql = """UPDATE qiita.analysis_sample
SET analysis_id = %s
WHERE analysis_id = %s"""
qdb.sql_connection.TRN.add(sql, [a_id, dflt_id])
- else:
- # insert analysis information into table as "in construction"
- sql = """INSERT INTO qiita.{0}
- (email, name, description, analysis_status_id)
- VALUES (%s, %s, %s, %s)
- RETURNING analysis_id""".format(cls._table)
- qdb.sql_connection.TRN.add(
- sql, [owner.id, name, description, status_id])
- a_id = qdb.sql_connection.TRN.execute_fetchlast()
# Add to both QIITA and given portal (if not QIITA)
sql = """INSERT INTO qiita.analysis_portal
@@ -176,14 +166,28 @@ def create(cls, owner, name, description, parent=None, from_default=False):
args.append([a_id, qp_id])
qdb.sql_connection.TRN.add(sql, args, many=True)
- # add parent if necessary
- if parent:
- sql = """INSERT INTO qiita.analysis_chain
- (parent_id, child_id)
- VALUES (%s, %s)"""
- qdb.sql_connection.TRN.add(sql, [parent.id, a_id])
+ instance = cls(a_id)
+
+ # Once the analysis is created, we can create the mapping file and
+ # the initial set of artifacts
+ plugin = qdb.software.Software.from_name_and_version(
+ 'Qiita', 'alpha')
+ cmd = plugin.get_command('build_analysis_files')
+ params = qdb.software.Parameters.load(
+ cmd, values_dict={
+ 'analysis': a_id,
+ 'merge_dup_sample_ids': merge_duplicated_sample_ids})
+ job = qdb.processing_job.ProcessingJob.create(
+ owner, params)
+ sql = """INSERT INTO qiita.analysis_processing_job
+ (analysis_id, processing_job_id)
+ VALUES (%s, %s)"""
+ qdb.sql_connection.TRN.add(sql, [a_id, job.id])
+ qdb.sql_connection.TRN.execute()
- return cls(a_id)
+ # Doing the submission outside of the transaction
+ job.submit()
+ return instance
@classmethod
def delete(cls, _id):
@@ -204,15 +208,21 @@ def delete(cls, _id):
if not cls.exists(_id):
raise qdb.exceptions.QiitaDBUnknownIDError(_id, "analysis")
+ # Check if the analysis has any artifact
+ sql = """SELECT EXISTS(SELECT *
+ FROM qiita.analysis_artifact
+ WHERE analysis_id = %s)"""
+ qdb.sql_connection.TRN.add(sql, [_id])
+ if qdb.sql_connection.TRN.execute_fetchlast():
+ raise qdb.exceptions.QiitaDBOperationNotPermittedError(
+ "Can't delete analysis %d, has artifacts attached"
+ % _id)
+
sql = "DELETE FROM qiita.analysis_filepath WHERE {0} = %s".format(
cls._analysis_id_column)
args = [_id]
qdb.sql_connection.TRN.add(sql, args)
- sql = "DELETE FROM qiita.analysis_workflow WHERE {0} = %s".format(
- cls._analysis_id_column)
- qdb.sql_connection.TRN.add(sql, args)
-
sql = "DELETE FROM qiita.analysis_portal WHERE {0} = %s".format(
cls._analysis_id_column)
qdb.sql_connection.TRN.add(sql, args)
@@ -221,7 +231,7 @@ def delete(cls, _id):
cls._analysis_id_column)
qdb.sql_connection.TRN.add(sql, args)
- sql = """DELETE FROM qiita.collection_analysis
+ sql = """DELETE FROM qiita.analysis_processing_job
WHERE {0} = %s""".format(cls._analysis_id_column)
qdb.sql_connection.TRN.add(sql, args)
@@ -259,7 +269,6 @@ def exists(cls, analysis_id):
qdb.sql_connection.TRN.add(sql, [analysis_id, qiita_config.portal])
return qdb.sql_connection.TRN.execute_fetchlast()
- # ---- Properties ----
@property
def owner(self):
"""The owner of the analysis
@@ -346,7 +355,6 @@ def description(self, description):
Analysis is public
"""
with qdb.sql_connection.TRN:
- self._lock_check()
sql = """UPDATE qiita.{0} SET description = %s
WHERE analysis_id = %s""".format(self._table)
qdb.sql_connection.TRN.add(sql, [description, self._id])
@@ -370,34 +378,6 @@ def samples(self):
qdb.sql_connection.TRN.add(sql, [self._id])
return dict(qdb.sql_connection.TRN.execute_fetchindex())
- @property
- def dropped_samples(self):
- """The samples that were selected but dropped in processing
-
- Returns
- -------
- dict of sets
- Format is {artifact_id: {sample_id, sample_id, ...}, ...}
- """
- with qdb.sql_connection.TRN:
- bioms = self.biom_tables
- if not bioms:
- return {}
-
- # get all samples selected for the analysis, converting lists to
- # sets for fast searching. Overhead less this way
- # for large analyses
- all_samples = {k: set(v) for k, v in viewitems(self.samples)}
-
- for biom, filepath in viewitems(bioms):
- table = load_table(filepath)
- ids = set(table.ids())
- for k in all_samples:
- all_samples[k] = all_samples[k] - ids
-
- # what's left are unprocessed samples, so return
- return all_samples
-
@property
def data_types(self):
"""Returns all data types used in the analysis
@@ -434,57 +414,14 @@ def shared_with(self):
for uid in qdb.sql_connection.TRN.execute_fetchflatten()]
@property
- def all_associated_filepath_ids(self):
- """Get all associated filepath_ids
-
- Returns
- -------
- list
- """
+ def artifacts(self):
with qdb.sql_connection.TRN:
- sql = """SELECT filepath_id
- FROM qiita.filepath
- JOIN qiita.analysis_filepath USING (filepath_id)
+ sql = """SELECT artifact_id
+ FROM qiita.analysis_artifact
WHERE analysis_id = %s"""
- qdb.sql_connection.TRN.add(sql, [self._id])
- filepaths = set(qdb.sql_connection.TRN.execute_fetchflatten())
-
- sql = """SELECT filepath_id
- FROM qiita.analysis_job
- JOIN qiita.job USING (job_id)
- JOIN qiita.job_results_filepath USING (job_id)
- JOIN qiita.filepath USING (filepath_id)
- WHERE analysis_id = %s"""
- qdb.sql_connection.TRN.add(sql, [self._id])
- return filepaths.union(
- qdb.sql_connection.TRN.execute_fetchflatten())
-
- @property
- def biom_tables(self):
- """The biom tables of the analysis
-
- Returns
- -------
- dict
- Dictonary in the form {data_type: full BIOM filepath}
- """
- fps = [(_id, fp) for _id, fp, ftype in qdb.util.retrieve_filepaths(
- "analysis_filepath", "analysis_id", self._id)
- if ftype == 'biom']
-
- if fps:
- fps_ids = [f[0] for f in fps]
- with qdb.sql_connection.TRN:
- sql = """SELECT filepath_id, data_type FROM qiita.filepath
- JOIN qiita.analysis_filepath USING (filepath_id)
- JOIN qiita.data_type USING (data_type_id)
- WHERE filepath_id IN %s"""
- qdb.sql_connection.TRN.add(sql, [tuple(fps_ids)])
- data_types = dict(qdb.sql_connection.TRN.execute_fetchindex())
-
- return {data_types[_id]: f for _id, f in fps}
- else:
- return {}
+ qdb.sql_connection.TRN.add(sql, [self.id])
+ return [qdb.artifact.Artifact(aid)
+ for aid in qdb.sql_connection.TRN.execute_fetchflatten()]
@property
def mapping_file(self):
@@ -524,65 +461,21 @@ def tgz(self):
else:
return None
- @property
- def step(self):
- """Returns the current step of the analysis
-
- Returns
- -------
- str
- The current step of the analysis
-
- Raises
- ------
- ValueError
- If the step is not set up
- """
- with qdb.sql_connection.TRN:
- self._lock_check()
- sql = """SELECT step FROM qiita.analysis_workflow
- WHERE analysis_id = %s"""
- qdb.sql_connection.TRN.add(sql, [self._id])
- try:
- return qdb.sql_connection.TRN.execute_fetchlast()
- except IndexError:
- raise ValueError("Step not set yet!")
-
- @step.setter
- def step(self, value):
- with qdb.sql_connection.TRN:
- self._lock_check()
- sql = """SELECT EXISTS(
- SELECT analysis_id
- FROM qiita.analysis_workflow
- WHERE analysis_id = %s)"""
- qdb.sql_connection.TRN.add(sql, [self._id])
- step_exists = qdb.sql_connection.TRN.execute_fetchlast()
-
- if step_exists:
- sql = """UPDATE qiita.analysis_workflow SET step = %s
- WHERE analysis_id = %s"""
- else:
- sql = """INSERT INTO qiita.analysis_workflow
- (step, analysis_id)
- VALUES (%s, %s)"""
- qdb.sql_connection.TRN.add(sql, [value, self._id])
- qdb.sql_connection.TRN.execute()
-
@property
def jobs(self):
- """A list of jobs included in the analysis
+ """The jobs generating the initial artifacts for the analysis
Returns
-------
- list of qiita_db.job.Job
+ list of qiita_db.processing_job.Processing_job
Job ids for jobs in analysis. Empty list if no jobs attached.
"""
with qdb.sql_connection.TRN:
- sql = """SELECT job_id FROM qiita.analysis_job
- WHERE analysis_id = %s""".format(self._table)
+ sql = """SELECT processing_job_id
+ FROM qiita.analysis_processing_job
+ WHERE analysis_id = %s"""
qdb.sql_connection.TRN.add(sql, [self._id])
- return [qdb.job.Job(jid)
+ return [qdb.processing_job.ProcessingJob(jid)
for jid in qdb.sql_connection.TRN.execute_fetchflatten()]
@property
@@ -619,22 +512,66 @@ def pmid(self, pmid):
An analysis should only ever have one PMID attached to it.
"""
with qdb.sql_connection.TRN:
- self._lock_check()
sql = """UPDATE qiita.{0} SET pmid = %s
WHERE analysis_id = %s""".format(self._table)
qdb.sql_connection.TRN.add(sql, [pmid, self._id])
qdb.sql_connection.TRN.execute()
- # @property
- # def parent(self):
- # """Returns the id of the parent analysis this was forked from"""
- # return QiitaDBNotImplementedError()
+ @property
+ def can_be_publicized(self):
+ """Returns whether the analysis can be made public
+
+ Returns
+ -------
+ bool
+ Whether the analysis can be publicized or not
+ """
+ # The analysis can be made public if all the artifacts used
+ # to get the samples from are public
+ with qdb.sql_connection.TRN:
+ sql = """SELECT DISTINCT artifact_id
+ FROM qiita.analysis_sample
+ WHERE analysis_id = %s"""
+ qdb.sql_connection.TRN.add(sql, [self.id])
+ return all(
+ [qdb.artifact.Artifact(aid).visibility == 'public'
+ for aid in qdb.sql_connection.TRN.execute_fetchflatten()])
+
+ def add_artifact(self, artifact):
+ """Adds an artifact to the analysis
+
+ Parameters
+ ----------
+ artifact : qiita_db.artifact.Artifact
+ The artifact to be added
+ """
+ with qdb.sql_connection.TRN:
+ sql = """INSERT INTO qiita.analysis_artifact
+ (analysis_id, artifact_id)
+ SELECT %s, %s
+ WHERE NOT EXISTS(SELECT *
+ FROM qiita.analysis_artifact
+ WHERE analysis_id = %s
+ AND artifact_id = %s)"""
+ qdb.sql_connection.TRN.add(sql, [self.id, artifact.id,
+ self.id, artifact.id])
- # @property
- # def children(self):
- # return QiitaDBNotImplementedError()
+ def set_error(self, error_msg):
+ """Sets the analysis error
+
+ Parameters
+ ----------
+ error_msg : str
+ The error message
+ """
+ with qdb.sql_connection.TRN:
+ le = qdb.logger.LogEntry.create('Runtime', error_msg)
+ sql = """UPDATE qiita.analysis
+ SET logging_id = %s
+ WHERE analysis_id = %s"""
+ qdb.sql_connection.TRN.add(sql, [le.id, self.id])
+ qdb.sql_connection.TRN.execute()
- # ---- Functions ----
def has_access(self, user):
"""Returns whether the given user has access to the analysis
@@ -656,6 +593,24 @@ def has_access(self, user):
return self in Analysis.get_by_status('public') | \
user.private_analyses | user.shared_analyses
+ def can_edit(self, user):
+ """Returns whether the given user can edit the analysis
+
+ Parameters
+ ----------
+ user : User object
+ User we are checking edit permissions for
+
+ Returns
+ -------
+ bool
+ Whether user can edit the study or not
+ """
+ # The analysis is editable only if the user is the owner, is in the
+ # shared list or the user is an admin
+ return (user.level in {'superuser', 'admin'} or self.owner == user or
+ user in self.shared_with)
+
def summary_data(self):
"""Return number of studies, artifacts, and samples selected
@@ -707,6 +662,21 @@ def unshare(self, user):
qdb.sql_connection.TRN.add(sql, [self._id, user.id])
qdb.sql_connection.TRN.execute()
+ def _lock_samples(self):
+ """Only dflt analyses can have samples added/removed
+
+ Raises
+ ------
+ qiita_db.exceptions.QiitaDBOperationNotPermittedError
+ If the analysis is not a default analysis
+ """
+ with qdb.sql_connection.TRN:
+ sql = "SELECT dflt FROM qiita.analysis WHERE analysis_id = %s"
+ qdb.sql_connection.TRN.add(sql, [self.id])
+ if not qdb.sql_connection.TRN.execute_fetchlast():
+ raise qdb.exceptions.QiitaDBOperationNotPermittedError(
+ "Can't add/remove samples from this analysis")
+
def add_samples(self, samples):
"""Adds samples to the analysis
@@ -717,7 +687,7 @@ def add_samples(self, samples):
{artifact_id: [sample1, sample2, ...], ...}
"""
with qdb.sql_connection.TRN:
- self._lock_check()
+ self._lock_samples()
for aid, samps in viewitems(samples):
# get previously selected samples for aid and filter them out
@@ -755,7 +725,7 @@ def remove_samples(self, artifacts=None, samples=None):
artifacts
"""
with qdb.sql_connection.TRN:
- self._lock_check()
+ self._lock_samples()
if artifacts and samples:
sql = """DELETE FROM qiita.analysis_sample
WHERE analysis_id = %s
@@ -781,79 +751,29 @@ def remove_samples(self, artifacts=None, samples=None):
qdb.sql_connection.TRN.add(sql, args, many=True)
qdb.sql_connection.TRN.execute()
- def generate_tgz(self):
- with qdb.sql_connection.TRN:
- fps_ids = self.all_associated_filepath_ids
- if not fps_ids:
- raise qdb.exceptions.QiitaDBError(
- "The analysis %s do not have files attached, "
- "can't create the tgz file" % self.id)
-
- sql = """SELECT filepath, data_directory_id FROM qiita.filepath
- WHERE filepath_id IN %s"""
- qdb.sql_connection.TRN.add(sql, [tuple(fps_ids)])
-
- full_fps = [join(qdb.util.get_mountpoint_path_by_id(mid), f)
- for f, mid in
- qdb.sql_connection.TRN.execute_fetchindex()]
-
- _, analysis_mp = qdb.util.get_mountpoint('analysis')[0]
- tgz = join(analysis_mp, '%d_files.tgz' % self.id)
- try:
- with taropen(tgz, "w:gz") as tar:
- for f in full_fps:
- tar.add(f, arcname=basename(f))
- error_txt = ''
- return_value = 0
- except Exception as e:
- error_txt = str(e)
- return_value = 1
-
- if return_value == 0:
- self._add_file(tgz, 'tgz')
-
- return '', error_txt, return_value
-
- def build_files(self,
- rarefaction_depth=None,
- merge_duplicated_sample_ids=False):
+ def build_files(self, merge_duplicated_sample_ids):
"""Builds biom and mapping files needed for analysis
Parameters
----------
- rarefaction_depth : int, optional
- Defaults to ``None``. If ``None``, do not rarefy. Otherwise, rarefy
- all samples to this number of observations
- merge_duplicated_sample_ids : bool, optional
+ merge_duplicated_sample_ids : bool
If the duplicated sample ids in the selected studies should be
- merged or prepended with the artifact ids. False (default) prepends
+ merged or prepended with the artifact ids. If false prepends
the artifact id
- Raises
- ------
- TypeError
- If `rarefaction_depth` is not an integer
- ValueError
- If `rarefaction_depth` is less than or equal to zero
-
Notes
-----
Creates biom tables for each requested data type
Creates mapping file for requested samples
"""
with qdb.sql_connection.TRN:
- if rarefaction_depth is not None:
- if type(rarefaction_depth) is not int:
- raise TypeError("rarefaction_depth must be in integer")
- if rarefaction_depth <= 0:
- raise ValueError(
- "rarefaction_depth must be greater than 0")
-
# in practice we could retrieve samples in each of the following
# calls but this will mean calling the DB multiple times and will
# make testing much harder as we will need to have analyses at
# different stages and possible errors.
samples = self.samples
+ # gettin the info of all the artifacts to save SQL time
+ bioms_info = qdb.util.get_artifacts_information(samples.keys())
# figuring out if we are going to have duplicated samples, again
# doing it here cause it's computational cheaper
@@ -863,20 +783,29 @@ def build_files(self,
# are going to create
rename_dup_samples = False
grouped_samples = {}
- for k, v in viewitems(samples):
- a = qdb.artifact.Artifact(k)
- p = a.processing_parameters
- if p is not None and p.command is not None:
- ref = (str(p.values['reference'])
- if 'reference' in p.values else 'na')
- cid = str(p.command.id)
+ for aid, asamples in viewitems(samples):
+ # find the artifat info, [0] there should be only 1 info
+ ainfo = [bi for bi in bioms_info
+ if bi['artifact_id'] == aid][0]
+
+ data_type = ainfo['data_type']
+ # algorithm is: processing_method | parent_processing, just
+ # keeping processing_method
+ algorithm = ainfo['algorithm'].split('|')[0].strip()
+ files = ainfo['files']
+
+ l = "%s || %s" % (data_type, algorithm)
+ # deblur special case, we need to account for file name
+ if 'deblur-workflow' in algorithm:
+ # [0] there is always just one biom
+ l += " || %s" % [f for f in files
+ if f.endswith('.biom')][0]
else:
- ref = 'na'
- cid = 'na'
- l = "%s.%s.%s" % (a.data_type, ref, cid)
+ l += " ||"
+
if l not in grouped_samples:
grouped_samples[l] = []
- grouped_samples[l].append((k, v))
+ grouped_samples[l].append((aid, asamples))
# 2. if rename_dup_samples is still False, make sure that we don't
# need to rename samples by checking that there are not
# duplicated samples per group
@@ -897,18 +826,21 @@ def build_files(self,
dup_samples = dup_samples | s
self._build_mapping_file(samples, rename_dup_samples)
- self._build_biom_tables(grouped_samples, rarefaction_depth,
- rename_dup_samples)
+ biom_files = self._build_biom_tables(
+ grouped_samples, rename_dup_samples)
- def _build_biom_tables(self, grouped_samples, rarefaction_depth=None,
- rename_dup_samples=False):
+ return biom_files
+
+ def _build_biom_tables(self, grouped_samples, rename_dup_samples=False):
"""Build tables and add them to the analysis"""
with qdb.sql_connection.TRN:
base_fp = qdb.util.get_work_base_dir()
- _, base_fp = qdb.util.get_mountpoint(self._table)[0]
+ biom_files = []
for label, tables in viewitems(grouped_samples):
- data_type, reference_id, command_id = label.split('.')
+ data_type, algorithm, files = [
+ l.strip() for l in label.split('||')]
+
new_table = None
artifact_ids = []
for aid, samples in tables:
@@ -955,31 +887,19 @@ def _build_biom_tables(self, grouped_samples, rarefaction_depth=None,
raise RuntimeError("All samples filtered out from "
"analysis due to rarefaction level")
- # add the metadata column for study the samples come from,
- # this is useful in case the user download the bioms
- study_md = {'study': artifact.study.title,
- 'artifact_ids': ', '.join(artifact_ids),
- 'reference_id': reference_id,
- 'command_id': command_id}
- samples_md = {sid: study_md for sid in new_table.ids()}
- new_table.add_metadata(samples_md, axis='sample')
-
- if rarefaction_depth is not None:
- new_table = new_table.subsample(rarefaction_depth)
- if len(new_table.ids()) == 0:
- raise RuntimeError(
- "All samples filtered out due to rarefacion level")
-
# write out the file
- fn = "%d_analysis_dt-%s_r-%s_c-%s.biom" % (
- self._id, data_type, reference_id, command_id)
+ data_type = sub('[^0-9a-zA-Z]+', '', data_type)
+ algorithm = sub('[^0-9a-zA-Z]+', '', algorithm)
+ files = sub('[^0-9a-zA-Z]+', '', files)
+ info = "%s_%s_%s" % (data_type, algorithm, files)
+ fn = "%d_analysis_%s.biom" % (self._id, info)
biom_fp = join(base_fp, fn)
with biom_open(biom_fp, 'w') as f:
new_table.to_hdf5(
- f, "Generated by Qiita. Analysis %d Datatype %s "
- "Reference %s Command %s" % (self._id, data_type,
- reference_id, command_id))
- self._add_file(fn, "biom", data_type=data_type)
+ f, "Generated by Qiita, analysis id: %d, info: %s" % (
+ self._id, label))
+ biom_files.append((data_type, biom_fp))
+ return biom_files
def _build_mapping_file(self, samples, rename_dup_samples=False):
"""Builds the combined mapping file for all samples
@@ -1067,256 +987,3 @@ def _add_file(self, filename, filetype, data_type=None):
VALUES (%s, %s{1})""".format(col, dtid)
qdb.sql_connection.TRN.add(sql, [self._id, fpid])
qdb.sql_connection.TRN.execute()
-
-
-class Collection(qdb.base.QiitaStatusObject):
- """
- Analysis overview object to track a multi-analysis collection.
-
- Attributes
- ----------
- name: str
- Name of the Collection
- description: str
- Description of what the collection is investigating
- owner: User object
- Owner of the Collection
- analyses: list of Analysis Objects
- all analyses that are part of the collection
- highlights : list of Job objects
- Important job results related to the collection
-
- Methods
- -------
- add_analysis
- remove_analysis
- highlight_job
- remove_highlight
- share
- unshare
- """
- _table = "collection"
- _analysis_table = "collection_analysis"
- _highlight_table = "collection_job"
- _share_table = "collection_users"
-
- def _status_setter_checks(self):
- r"""Perform a check to make sure not setting status away from public
- """
- if self.check_status(("public", )):
- raise qdb.exceptions.QiitaDBStatusError(
- "Illegal operation on public collection!")
-
- @classmethod
- def create(cls, owner, name, description=None):
- """Creates a new collection on the database
-
- Parameters
- ----------
- owner : User object
- Owner of the collection
- name : str
- Name of the collection
- description : str, optional
- Brief description of the collecton's overarching goal
- """
- with qdb.sql_connection.TRN:
- sql = """INSERT INTO qiita.{0} (email, name, description)
- VALUES (%s, %s, %s)
- RETURNING collection_id""".format(cls._table)
- qdb.sql_connection.TRN.add(sql, [owner.id, name, description])
- c_id = qdb.sql_connection.TRN.execute_fetchlast()
-
- return cls(c_id)
-
- @classmethod
- def delete(cls, id_):
- """Deletes a collection from the database
-
- Parameters
- ----------
- id_ : int
- ID of the collection to delete
-
- Raises
- ------
- QiitaDBStatusError
- Trying to delete a public collection
- """
- with qdb.sql_connection.TRN:
- if cls(id_).status == "public":
- raise qdb.exceptions.QiitaDBStatusError(
- "Can't delete public collection!")
-
- sql = "DELETE FROM qiita.{0} WHERE collection_id = %s"
- for table in (cls._analysis_table, cls._highlight_table,
- cls._share_table, cls._table):
- qdb.sql_connection.TRN.add(sql.format(table), [id_])
-
- qdb.sql_connection.TRN.execute()
-
- # --- Properties ---
- @property
- def name(self):
- with qdb.sql_connection.TRN:
- sql = "SELECT name FROM qiita.{0} WHERE collection_id = %s".format(
- self._table)
- qdb.sql_connection.TRN.add(sql, [self._id])
- return qdb.sql_connection.TRN.execute_fetchlast()
-
- @name.setter
- def name(self, value):
- with qdb.sql_connection.TRN:
- self._status_setter_checks()
-
- sql = """UPDATE qiita.{0} SET name = %s
- WHERE collection_id = %s""".format(self._table)
- qdb.sql_connection.TRN.add(sql, [value, self._id])
- qdb.sql_connection.TRN.execute()
-
- @property
- def description(self):
- with qdb.sql_connection.TRN:
- sql = """SELECT description FROM qiita.{0}
- WHERE collection_id = %s""".format(self._table)
- qdb.sql_connection.TRN.add(sql, [self._id])
- return qdb.sql_connection.TRN.execute_fetchlast()
-
- @description.setter
- def description(self, value):
- with qdb.sql_connection.TRN:
- self._status_setter_checks()
-
- sql = """UPDATE qiita.{0} SET description = %s
- WHERE collection_id = %s""".format(self._table)
- qdb.sql_connection.TRN.add(sql, [value, self._id])
- qdb.sql_connection.TRN.execute()
-
- @property
- def owner(self):
- with qdb.sql_connection.TRN:
- sql = """SELECT email FROM qiita.{0}
- WHERE collection_id = %s""".format(self._table)
- qdb.sql_connection.TRN.add(sql, [self._id])
- return qdb.user.User(qdb.sql_connection.TRN.execute_fetchlast())
-
- @property
- def analyses(self):
- with qdb.sql_connection.TRN:
- sql = """SELECT analysis_id FROM qiita.{0}
- WHERE collection_id = %s""".format(self._analysis_table)
- qdb.sql_connection.TRN.add(sql, [self._id])
- return [Analysis(aid)
- for aid in qdb.sql_connection.TRN.execute_fetchflatten()]
-
- @property
- def highlights(self):
- with qdb.sql_connection.TRN:
- sql = """SELECT job_id FROM qiita.{0}
- WHERE collection_id = %s""".format(self._highlight_table)
- qdb.sql_connection.TRN.add(sql, [self._id])
- return [qdb.job.Job(jid)
- for jid in qdb.sql_connection.TRN.execute_fetchflatten()]
-
- @property
- def shared_with(self):
- with qdb.sql_connection.TRN:
- sql = """SELECT email FROM qiita.{0}
- WHERE collection_id = %s""".format(self._share_table)
- qdb.sql_connection.TRN.add(sql, [self._id])
- return [qdb.user.User(uid)
- for uid in qdb.sql_connection.TRN.execute_fetchflatten()]
-
- # --- Functions ---
- def add_analysis(self, analysis):
- """Adds an analysis to the collection object
-
- Parameters
- ----------
- analysis : Analysis object
- """
- with qdb.sql_connection.TRN:
- self._status_setter_checks()
-
- sql = """INSERT INTO qiita.{0} (analysis_id, collection_id)
- VALUES (%s, %s)""".format(self._analysis_table)
- qdb.sql_connection.TRN.add(sql, [analysis.id, self._id])
- qdb.sql_connection.TRN.execute()
-
- def remove_analysis(self, analysis):
- """Remove an analysis from the collection object
-
- Parameters
- ----------
- analysis : Analysis object
- """
- with qdb.sql_connection.TRN:
- self._status_setter_checks()
-
- sql = """DELETE FROM qiita.{0}
- WHERE analysis_id = %s
- AND collection_id = %s""".format(self._analysis_table)
- qdb.sql_connection.TRN.add(sql, [analysis.id, self._id])
- qdb.sql_connection.TRN.execute()
-
- def highlight_job(self, job):
- """Marks a job as important to the collection
-
- Parameters
- ----------
- job : Job object
- """
- with qdb.sql_connection.TRN:
- self._status_setter_checks()
-
- sql = """INSERT INTO qiita.{0} (job_id, collection_id)
- VALUES (%s, %s)""".format(self._highlight_table)
- qdb.sql_connection.TRN.add(sql, [job.id, self._id])
- qdb.sql_connection.TRN.execute()
-
- def remove_highlight(self, job):
- """Removes job importance from the collection
-
- Parameters
- ----------
- job : Job object
- """
- with qdb.sql_connection.TRN:
- self._status_setter_checks()
-
- sql = """DELETE FROM qiita.{0}
- WHERE job_id = %s
- AND collection_id = %s""".format(self._highlight_table)
- qdb.sql_connection.TRN.add(sql, [job.id, self._id])
- qdb.sql_connection.TRN.execute()
-
- def share(self, user):
- """Shares the collection with another user
-
- Parameters
- ----------
- user : User object
- """
- with qdb.sql_connection.TRN:
- self._status_setter_checks()
-
- sql = """INSERT INTO qiita.{0} (email, collection_id)
- VALUES (%s, %s)""".format(self._share_table)
- qdb.sql_connection.TRN.add(sql, [user.id, self._id])
- qdb.sql_connection.TRN.execute()
-
- def unshare(self, user):
- """Unshares the collection with another user
-
- Parameters
- ----------
- user : User object
- """
- with qdb.sql_connection.TRN:
- self._status_setter_checks()
-
- sql = """DELETE FROM qiita.{0}
- WHERE email = %s
- AND collection_id = %s""".format(self._share_table)
- qdb.sql_connection.TRN.add(sql, [user.id, self._id])
- qdb.sql_connection.TRN.execute()
diff --git a/qiita_db/artifact.py b/qiita_db/artifact.py
index ea16cccc1..1fca42f74 100644
--- a/qiita_db/artifact.py
+++ b/qiita_db/artifact.py
@@ -10,12 +10,17 @@
from future.utils import viewitems
from itertools import chain
from datetime import datetime
-from os import remove
+from os import remove, makedirs
+from os.path import isfile, exists, relpath
+from shutil import rmtree
+from functools import partial
import networkx as nx
import qiita_db as qdb
+from qiita_core.qiita_settings import qiita_config
+
class Artifact(qdb.base.QiitaObject):
r"""Any kind of file (or group of files) stored in the system and its
@@ -149,6 +154,25 @@ def create_type(name, description, can_be_submitted_to_ebi,
[at_id, qdb.util.convert_to_id(fpt, 'filepath_type'), req]
for fpt, req in filepath_types]
qdb.sql_connection.TRN.add(sql, sql_args, many=True)
+
+ # When creating a type is expected that a new mountpoint is created
+ # for that type, note that we are going to check if there is an
+ # extra path for the mountpoint, which is useful for the test
+ # environment
+ qc = qiita_config
+ mp = relpath(qc.working_dir, qc.base_data_dir).replace(
+ 'working_dir', '')
+ mp = mp + name if mp != '/' and mp != '' else name
+ sql = """INSERT INTO qiita.data_directory
+ (data_type, mountpoint, subdirectory, active)
+ VALUES (%s, %s, %s, %s)"""
+ qdb.sql_connection.TRN.add(sql, [name, mp, True, True])
+
+ # We are intersted in the dirpath
+ dp = qdb.util.get_mountpoint(name)[0][1]
+ if not exists(dp):
+ makedirs(dp)
+
qdb.sql_connection.TRN.execute()
@classmethod
@@ -208,17 +232,22 @@ def copy(cls, artifact, prep_template):
@classmethod
def create(cls, filepaths, artifact_type, name=None, prep_template=None,
- parents=None, processing_parameters=None, move_files=True):
+ parents=None, processing_parameters=None, move_files=True,
+ analysis=None, data_type=None):
r"""Creates a new artifact in the system
The parameters depend on how the artifact was generated:
- If the artifact was uploaded by the user, the parameter
- `prep_template` should be provided and the parameters `parents` and
- `processing_parameters` should not be provided.
+ `prep_template` should be provided and the parameters `parents`,
+ `processing_parameters` and `analysis` should not be provided.
- If the artifact was generated by processing one or more
artifacts, the parameters `parents` and `processing_parameters`
- should be provided and the parameter `prep_template` should not
- be provided.
+ should be provided and the parameters `prep_template` and
+ `analysis` should not be provided.
+ - If the artifact is the initial artifact of the analysis, the
+ parameters `analysis` and `data_type` should be provided and the
+ parameters `prep_template`, `parents` and `processing_parameters`
+ should not be provided.
Parameters
----------
@@ -232,16 +261,25 @@ def create(cls, filepaths, artifact_type, name=None, prep_template=None,
prep_template : qiita_db.metadata_template.PrepTemplate, optional
If the artifact is being uploaded by the user, the prep template
to which the artifact should be linked to. If not provided,
- `parents` should be provided.
+ `parents` or `analysis` should be provided.
parents : iterable of qiita_db.artifact.Artifact, optional
The list of artifacts from which the new artifact has been
- generated. If not provided, `prep_template` should be provided.
+ generated. If not provided, `prep_template` or `analysis`
+ should be provided.
processing_parameters : qiita_db.software.Parameters, optional
The processing parameters used to generate the new artifact
from `parents`. It is required if `parents` is provided. It should
- not be provided if `prep_template` is provided.
+ not be provided if `processing_parameters` is not provided.
move_files : bool, optional
If False the files will not be moved but copied
+ analysis : qiita_db.analysis.Analysis, optional
+ If the artifact is the inital artifact of an analysis, the analysis
+ to which the artifact belongs to. If not provided, `prep_template`
+ or `parents` should be provided.
+ data_type : str
+ The data_type of the artifact in the `analysis`. It is required if
+ `analysis` is provided. It should not be provided if `analysis` is
+ not provided.
Returns
-------
@@ -271,71 +309,131 @@ def create(cls, filepaths, artifact_type, name=None, prep_template=None,
raise qdb.exceptions.QiitaDBArtifactCreationError(
"at least one filepath is required.")
- # Parents or prep template must be provided, but not both
- if parents and prep_template:
+ # Check that the combination of parameters is correct
+ counts = (int(bool(parents or processing_parameters)) +
+ int(prep_template is not None) +
+ int(bool(analysis or data_type)))
+ if counts != 1:
+ # More than one parameter has been provided
raise qdb.exceptions.QiitaDBArtifactCreationError(
- "parents or prep_template should be provided but not both")
- elif not (parents or prep_template):
+ "One and only one of parents, prep template or analysis must "
+ "be provided")
+ elif bool(parents) != bool(processing_parameters):
+ # When provided, parents and processing parameters both should be
+ # provided (this is effectively doing an XOR)
raise qdb.exceptions.QiitaDBArtifactCreationError(
- "at least parents or prep_template must be provided")
- elif parents and not processing_parameters:
- # If parents is provided, processing parameters should also be
- # provided
+ "When provided, both parents and processing parameters should "
+ "be provided")
+ elif bool(analysis) != bool(data_type):
+ # When provided, analysis and data_type both should be
+ # provided (this is effectively doing an XOR)
raise qdb.exceptions.QiitaDBArtifactCreationError(
- "if parents is provided, processing_parameters should also be"
- "provided.")
- elif prep_template and processing_parameters:
- # If prep_template is provided, processing_parameters should not be
- # provided
- raise qdb.exceptions.QiitaDBArtifactCreationError(
- "if prep_template is provided, processing_parameters should "
- "not be provided.")
+ "When provided, both analysis and data_type should "
+ "be provided")
+
+ # There are three different ways of creating an Artifact, but all of
+ # them execute a set of common operations. Declare functions to avoid
+ # code duplication. These functions should not be used outside of the
+ # create function, hence declaring them here
+ def _common_creation_steps(atype, cmd_id, data_type, cmd_parameters):
+ gen_timestamp = datetime.now()
+ visibility_id = qdb.util.convert_to_id("sandbox", "visibility")
+ atype_id = qdb.util.convert_to_id(atype, "artifact_type")
+ dtype_id = qdb.util.convert_to_id(data_type, "data_type")
+ # Create the artifact row in the artifact table
+ sql = """INSERT INTO qiita.artifact
+ (generated_timestamp, command_id, data_type_id,
+ command_parameters, visibility_id,
+ artifact_type_id, submitted_to_vamps)
+ VALUES (%s, %s, %s, %s, %s, %s, %s)
+ RETURNING artifact_id"""
+ sql_args = [gen_timestamp, cmd_id, dtype_id,
+ cmd_parameters, visibility_id, atype_id, False]
+ qdb.sql_connection.TRN.add(sql, sql_args)
+ a_id = qdb.sql_connection.TRN.execute_fetchlast()
+ qdb.sql_connection.TRN.execute()
- timestamp = datetime.now()
+ return cls(a_id)
- with qdb.sql_connection.TRN:
- visibility_id = qdb.util.convert_to_id("sandbox", "visibility")
- artifact_type_id = qdb.util.convert_to_id(
- artifact_type, "artifact_type")
+ def _associate_with_study(instance, study_id):
+ # Associate the artifact with the study
+ sql = """INSERT INTO qiita.study_artifact
+ (study_id, artifact_id)
+ VALUES (%s, %s)"""
+ sql_args = [study_id, instance.id]
+ qdb.sql_connection.TRN.add(sql, sql_args)
+ qdb.sql_connection.TRN.execute()
- if parents:
- # Check that all parents belong to the same study
- studies = {p.study.id for p in parents}
- if len(studies) > 1:
- raise qdb.exceptions.QiitaDBArtifactCreationError(
- "parents from multiple studies provided: %s"
- % ', '.join(studies))
- study_id = studies.pop()
+ def _associate_with_analysis(instance, analysis_id):
+ # Associate the artifact with the analysis
+ sql = """INSERT INTO qiita.analysis_artifact
+ (analysis_id, artifact_id)
+ VALUES (%s, %s)"""
+ sql_args = [analysis_id, instance.id]
+ qdb.sql_connection.TRN.add(sql, sql_args)
+ qdb.sql_connection.TRN.execute()
- # Check that all parents have the same data type
+ with qdb.sql_connection.TRN:
+ if parents:
dtypes = {p.data_type for p in parents}
- if len(dtypes) > 1:
+ # If an artifact has parents, it can be either from the
+ # processing pipeline or the analysis pipeline. Decide which
+ # one here
+ studies = {p.study for p in parents}
+ analyses = {p.analysis for p in parents}
+ studies.discard(None)
+ analyses.discard(None)
+ studies = {s.id for s in studies}
+ analyses = {a.id for a in analyses}
+
+ # The first 2 cases should never happen, but it doesn't hurt
+ # to check them
+ len_studies = len(studies)
+ len_analyses = len(analyses)
+ if len_studies > 0 and len_analyses > 0:
+ raise qdb.exceptions.QiitaDBArtifactCreationError(
+ "All the parents from an artifact should be either "
+ "from the analysis pipeline or all from the processing"
+ " pipeline")
+ elif len_studies > 1 or len_studies > 1:
raise qdb.exceptions.QiitaDBArtifactCreationError(
- "parents have multiple data types: %s"
- % ", ".join(dtypes))
- dtype_id = qdb.util.convert_to_id(dtypes.pop(), "data_type")
-
- # Create the artifact
- sql = """INSERT INTO qiita.artifact
- (generated_timestamp, command_id, data_type_id,
- command_parameters, visibility_id,
- artifact_type_id, submitted_to_vamps)
- VALUES (%s, %s, %s, %s, %s, %s, %s)
- RETURNING artifact_id"""
- sql_args = [timestamp, processing_parameters.command.id,
- dtype_id, processing_parameters.dump(),
- visibility_id, artifact_type_id, False]
- qdb.sql_connection.TRN.add(sql, sql_args)
- a_id = qdb.sql_connection.TRN.execute_fetchlast()
+ "Parents from multiple studies/analyses provided. "
+ "Analyses: %s. Studies: %s."
+ % (', '.join(analyses), ', '.join(studies)))
+ elif len_studies == 1:
+ # This artifact is part of the processing pipeline
+ study_id = studies.pop()
+ # In the processing pipeline, artifacts can have only
+ # one dtype
+ if len(dtypes) > 1:
+ raise qdb.exceptions.QiitaDBArtifactCreationError(
+ "parents have multiple data types: %s"
+ % ", ".join(dtypes))
+
+ instance = _common_creation_steps(
+ artifact_type, processing_parameters.command.id,
+ dtypes.pop(), processing_parameters.dump())
+
+ _associate_with_study(instance, study_id)
+ else:
+ # This artifact is part of the analysis pipeline
+ analysis_id = analyses.pop()
+ # In the processing pipeline, artifact parents can have
+ # more than one data type
+ data_type = ("Multiomic"
+ if len(dtypes) > 1 else dtypes.pop())
+ instance = _common_creation_steps(
+ artifact_type, processing_parameters.command.id,
+ data_type, processing_parameters.dump())
+ _associate_with_analysis(instance, analysis_id)
# Associate the artifact with its parents
sql = """INSERT INTO qiita.parent_artifact
(artifact_id, parent_id)
VALUES (%s, %s)"""
- sql_args = [(a_id, p.id) for p in parents]
+ sql_args = [(instance.id, p.id) for p in parents]
qdb.sql_connection.TRN.add(sql, sql_args, many=True)
- instance = cls(a_id)
# inheriting visibility
visibilities = {a.visibility for a in instance.parents}
# set based on the "lowest" visibility
@@ -345,42 +443,32 @@ def create(cls, filepaths, artifact_type, name=None, prep_template=None,
instance.visibility = 'private'
else:
instance.visibility = 'public'
- else:
- dtype_id = qdb.util.convert_to_id(prep_template.data_type(),
- "data_type")
- # Create the artifact
- sql = """INSERT INTO qiita.artifact
- (generated_timestamp, visibility_id,
- artifact_type_id, data_type_id,
- submitted_to_vamps)
- VALUES (%s, %s, %s, %s, %s)
- RETURNING artifact_id"""
- sql_args = [timestamp, visibility_id, artifact_type_id,
- dtype_id, False]
- qdb.sql_connection.TRN.add(sql, sql_args)
- a_id = qdb.sql_connection.TRN.execute_fetchlast()
+ elif prep_template:
+ # This artifact is uploaded by the user in the
+ # processing pipeline
+ instance = _common_creation_steps(
+ artifact_type, None, prep_template.data_type(), None)
# Associate the artifact with the prep template
- instance = cls(a_id)
prep_template.artifact = instance
- study_id = prep_template.study_id
-
- # Associate the artifact with the study
- sql = """INSERT INTO qiita.study_artifact (study_id, artifact_id)
- VALUES (%s, %s)"""
- sql_args = [study_id, a_id]
- qdb.sql_connection.TRN.add(sql, sql_args)
+ # Associate the artifact with the study
+ _associate_with_study(instance, prep_template.study_id)
+ else:
+ # This artifact is an initial artifact of an analysis
+ instance = _common_creation_steps(
+ artifact_type, None, data_type, None)
+ # Associate the artifact with the analysis
+ analysis.add_artifact(instance)
# Associate the artifact with its filepaths
fp_ids = qdb.util.insert_filepaths(
- filepaths, a_id, artifact_type, "filepath",
+ filepaths, instance.id, artifact_type, "filepath",
move_files=move_files, copy=(not move_files))
sql = """INSERT INTO qiita.artifact_filepath
(artifact_id, filepath_id)
VALUES (%s, %s)"""
- sql_args = [[a_id, fp_id] for fp_id in fp_ids]
+ sql_args = [[instance.id, fp_id] for fp_id in fp_ids]
qdb.sql_connection.TRN.add(sql, sql_args, many=True)
- qdb.sql_connection.TRN.execute()
if name:
instance.name = name
@@ -505,6 +593,10 @@ def delete(cls, artifact_id):
sql = "DELETE FROM qiita.study_artifact WHERE artifact_id = %s"
qdb.sql_connection.TRN.add(sql, [artifact_id])
+ # Detach the artifact from the analysis_artifact table
+ sql = "DELETE FROM qiita.analysis_artifact WHERE artifact_id = %s"
+ qdb.sql_connection.TRN.add(sql, [artifact_id])
+
# Delete the row in the artifact table
sql = "DELETE FROM qiita.artifact WHERE artifact_id = %s"
qdb.sql_connection.TRN.add(sql, [artifact_id])
@@ -873,41 +965,55 @@ def html_summary_fp(self):
return res
- @html_summary_fp.setter
- def html_summary_fp(self, value):
+ def set_html_summary(self, html_fp, support_dir=None):
"""Sets the HTML summary of the artifact
Parameters
----------
- value : str
+ html_fp : str
Path to the new HTML summary
+ support_dir : str
+ Path to the directory containing any support files needed by
+ the HTML file
"""
with qdb.sql_connection.TRN:
- current = self.html_summary_fp
- if current:
+ if self.html_summary_fp:
# Delete the current HTML summary
- fp_id = current[0]
- fp = current[1]
+ to_delete_ids = []
+ to_delete_fps = []
+ for fp_id, fp, fp_type in self.filepaths:
+ if fp_type in ('html_summary', 'html_summary_dir'):
+ to_delete_ids.append([fp_id])
+ to_delete_fps.append(fp)
# From the artifact_filepath table
sql = """DELETE FROM qiita.artifact_filepath
WHERE filepath_id = %s"""
- qdb.sql_connection.TRN.add(sql, [fp_id])
+ qdb.sql_connection.TRN.add(sql, to_delete_ids, many=True)
# From the filepath table
sql = "DELETE FROM qiita.filepath WHERE filepath_id=%s"
- qdb.sql_connection.TRN.add(sql, [fp_id])
+ qdb.sql_connection.TRN.add(sql, to_delete_ids, many=True)
# And from the filesystem only after the transaction is
# successfully completed (after commit)
- qdb.sql_connection.TRN.add_post_commit_func(remove, fp)
+
+ def path_cleaner(fp):
+ if isfile(fp):
+ remove(fp)
+ else:
+ rmtree(fp)
+ qdb.sql_connection.TRN.add_post_commit_func(
+ partial(map, path_cleaner, to_delete_fps))
# Add the new HTML summary
+ filepaths = [(html_fp, 'html_summary')]
+ if support_dir is not None:
+ filepaths.append((support_dir, 'html_summary_dir'))
fp_ids = qdb.util.insert_filepaths(
- [(value, 'html_summary')], self.id, self.artifact_type,
- "filepath")
+ filepaths, self.id, self.artifact_type, "filepath")
sql = """INSERT INTO qiita.artifact_filepath
(artifact_id, filepath_id)
VALUES (%s, %s)"""
- # We only inserted a single filepath, so using index 0
- qdb.sql_connection.TRN.add(sql, [self.id, fp_ids[0]])
+ sql_args = [[self.id, id_] for id_ in fp_ids]
+ qdb.sql_connection.TRN.add(sql, sql_args, many=True)
qdb.sql_connection.TRN.execute()
@property
@@ -1085,15 +1191,33 @@ def study(self):
Returns
-------
- qiita_db.study.Study
- The study that owns the artifact
+ qiita_db.study.Study or None
+ The study that owns the artifact, if any
"""
with qdb.sql_connection.TRN:
sql = """SELECT study_id
FROM qiita.study_artifact
WHERE artifact_id = %s"""
qdb.sql_connection.TRN.add(sql, [self.id])
- return qdb.study.Study(qdb.sql_connection.TRN.execute_fetchlast())
+ res = qdb.sql_connection.TRN.execute_fetchindex()
+ return qdb.study.Study(res[0][0]) if res else None
+
+ @property
+ def analysis(self):
+ """The analysis to which the artifact belongs to
+
+ Returns
+ -------
+ qiita_db.analysis.Analysis or None
+ The analysis that owns the artifact, if any
+ """
+ with qdb.sql_connection.TRN:
+ sql = """SELECT analysis_id
+ FROM qiita.analysis_artifact
+ WHERE artifact_id = %s"""
+ qdb.sql_connection.TRN.add(sql, [self.id])
+ res = qdb.sql_connection.TRN.execute_fetchindex()
+ return qdb.analysis.Analysis(res[0][0]) if res else None
def jobs(self, cmd=None, status=None):
"""Jobs that used this artifact as input
diff --git a/qiita_db/base.py b/qiita_db/base.py
index 084333616..b2fadaccc 100644
--- a/qiita_db/base.py
+++ b/qiita_db/base.py
@@ -14,7 +14,6 @@
:toctree: generated/
QiitaObject
- QiitaStatusObject
"""
# -----------------------------------------------------------------------------
@@ -220,106 +219,3 @@ def __hash__(self):
def id(self):
r"""The object id on the storage system"""
return self._id
-
-
-class QiitaStatusObject(QiitaObject):
- r"""Base class for any qiita_db object with a status property
-
- Attributes
- ----------
- status
-
- Methods
- -------
- check_status
- _status_setter_checks
- """
-
- @property
- def status(self):
- r"""String with the current status of the analysis"""
- # Get the DB status of the object
- with qdb.sql_connection.TRN:
- sql = """SELECT status FROM qiita.{0}_status
- WHERE {0}_status_id = (
- SELECT {0}_status_id FROM qiita.{0}
- WHERE {0}_id = %s)""".format(self._table)
- qdb.sql_connection.TRN.add(sql, [self._id])
- return qdb.sql_connection.TRN.execute_fetchlast()
-
- def _status_setter_checks(self):
- r"""Perform any extra checks that needed to be done before setting the
- object status on the database. Should be overwritten by the subclasses
- """
- raise qdb.exceptions.QiitaDBNotImplementedError()
-
- @status.setter
- def status(self, status):
- r"""Change the status of the analysis
-
- Parameters
- ----------
- status: str
- The new object status
- """
- with qdb.sql_connection.TRN:
- # Perform any extra checks needed before
- # we update the status in the DB
- self._status_setter_checks()
-
- # Update the status of the object
- sql = """UPDATE qiita.{0} SET {0}_status_id = (
- SELECT {0}_status_id FROM qiita.{0}_status
- WHERE status = %s)
- WHERE {0}_id = %s""".format(self._table)
- qdb.sql_connection.TRN.add(sql, [status, self._id])
- qdb.sql_connection.TRN.execute()
-
- def check_status(self, status, exclude=False):
- r"""Checks status of object.
-
- Parameters
- ----------
- status: iterable
- Iterable of statuses to check against.
- exclude: bool, optional
- If True, will check that database status is NOT one of the statuses
- passed. Default False.
-
- Returns
- -------
- bool
- True if the object status is in the desired set of statuses. False
- otherwise.
-
- Notes
- -----
- This assumes the following database setup is in place: For a given
- cls._table setting, such as "table", there is a corresponding table
- with the name "table_status" holding the status entries allowed. This
- table has a column called "status" that holds the values corresponding
- to what is passed as status in this function and a column
- "table_status_id" corresponding to the column of the same name in
- "table".
-
- Table setup:
- foo: foo_status_id ----> foo_status: foo_status_id, status
- """
- with qdb.sql_connection.TRN:
- # Get all available statuses
- sql = "SELECT DISTINCT status FROM qiita.{0}_status".format(
- self._table)
- qdb.sql_connection.TRN.add(sql)
- # We need to access to the results of the last SQL query,
- # hence indexing using -1
- avail_status = [
- x[0] for x in qdb.sql_connection.TRN.execute_fetchindex()]
-
- # Check that all the provided status are valid status
- if set(status).difference(avail_status):
- raise ValueError("%s are not valid status values"
- % set(status).difference(avail_status))
-
- # Get the DB status of the object
- dbstatus = self.status
- return dbstatus not in status if exclude else dbstatus in status
diff --git a/qiita_db/environment_manager.py b/qiita_db/environment_manager.py
index 7a3aa3499..b6981f045 100644
--- a/qiita_db/environment_manager.py
+++ b/qiita_db/environment_manager.py
@@ -405,4 +405,4 @@ def patch(patches_dir=PATCHES_DIR, verbose=False, test=False):
if verbose:
print('\t\tApplying python patch %s...'
% py_patch_filename)
- execfile(py_patch_fp)
+ execfile(py_patch_fp, {})
diff --git a/qiita_db/handlers/analysis.py b/qiita_db/handlers/analysis.py
new file mode 100644
index 000000000..4a311a512
--- /dev/null
+++ b/qiita_db/handlers/analysis.py
@@ -0,0 +1,69 @@
+# -----------------------------------------------------------------------------
+# Copyright (c) 2014--, The Qiita Development Team.
+#
+# Distributed under the terms of the BSD 3-clause License.
+#
+# The full license is in the file LICENSE, distributed with this software.
+# -----------------------------------------------------------------------------
+
+from tornado.web import HTTPError
+
+import qiita_db as qdb
+from .oauth2 import OauthBaseHandler, authenticate_oauth
+
+
+def _get_analysis(a_id):
+ """Returns the analysis with the given `a_id` if it exists
+
+ Parameters
+ ----------
+ a_id : str
+ The analysis id
+
+ Returns
+ -------
+ qiita_db.analysis.Analysis
+ The requested analysis
+
+ Raises
+ ------
+ HTTPError
+ If the analysis does not exist, with error code 404
+ If there is a problem instantiating the analysis, with error code 500
+ """
+ try:
+ a_id = int(a_id)
+ a = qdb.analysis.Analysis(a_id)
+ except qdb.exceptions.QiitaDBUnknownIDError:
+ raise HTTPError(404)
+ except Exception as e:
+ raise HTTPError(500, 'Error instantiating analysis %s: %s'
+ % (a_id, str(e)))
+ return a
+
+
+class APIAnalysisMetadataHandler(OauthBaseHandler):
+ @authenticate_oauth
+ def get(self, analysis_id):
+ """Retrieves the analysis metadata
+
+ Parameters
+ ----------
+ analysis_id : str
+ The id of the analysis whose information is being retrieved
+
+ Returns
+ -------
+ dict
+ The contents of the analysis keyed by sample id
+ """
+ with qdb.sql_connection.TRN:
+ a = _get_analysis(analysis_id)
+ mf_fp = a.mapping_file
+ response = None
+ if mf_fp is not None:
+ df = qdb.metadata_template.util.load_template_to_dataframe(
+ mf_fp, index='#SampleID')
+ response = df.to_dict(orient='index')
+
+ self.write(response)
diff --git a/qiita_db/handlers/artifact.py b/qiita_db/handlers/artifact.py
index 82732863f..cd99b8da4 100644
--- a/qiita_db/handlers/artifact.py
+++ b/qiita_db/handlers/artifact.py
@@ -79,6 +79,8 @@ def get(self, artifact_id):
"""
with qdb.sql_connection.TRN:
artifact = _get_artifact(artifact_id)
+ study = artifact.study
+ analysis = artifact.analysis
response = {
'name': artifact.name,
'timestamp': str(artifact.timestamp),
@@ -89,7 +91,8 @@ def get(self, artifact_id):
'can_be_submitted_to_vamps':
artifact.can_be_submitted_to_vamps,
'prep_information': [p.id for p in artifact.prep_templates],
- 'study': artifact.study.id}
+ 'study': study.id if study else None,
+ 'analysis': analysis.id if analysis else None}
params = artifact.processing_parameters
response['processing_parameters'] = (
params.values if params is not None else None)
@@ -128,8 +131,17 @@ def patch(self, artifact_id):
raise HTTPError(400, 'Incorrect path parameter value')
else:
artifact = _get_artifact(artifact_id)
+
+ try:
+ html_data = loads(req_value)
+ html_fp = html_data['html']
+ html_dir = html_data['dir']
+ except ValueError:
+ html_fp = req_value
+ html_dir = None
+
try:
- artifact.html_summary_fp = req_value
+ artifact.set_html_summary(html_fp, html_dir)
except Exception as e:
raise HTTPError(500, str(e))
else:
@@ -166,15 +178,21 @@ def post(self):
"""
filepaths = loads(self.get_argument('filepaths'))
artifact_type = self.get_argument('type')
- prep_template = self.get_argument('prep')
+ prep_template = self.get_argument('prep', None)
+ analysis = self.get_argument('analysis', None)
name = self.get_argument('name', None)
+ dtype = self.get_argument('data_type', None)
- if prep_template:
+ if prep_template is not None:
prep_template = qdb.metadata_template.prep_template.PrepTemplate(
prep_template)
+ dtype = None
+ if analysis is not None:
+ analysis = qdb.analysis.Analysis(analysis)
a = qdb.artifact.Artifact.create(
- filepaths, artifact_type, name=name, prep_template=prep_template)
+ filepaths, artifact_type, name=name, prep_template=prep_template,
+ analysis=analysis, data_type=dtype)
self.write({'artifact': a.id})
diff --git a/qiita_db/handlers/plugin.py b/qiita_db/handlers/plugin.py
index 5850df51c..df4e1e2ff 100644
--- a/qiita_db/handlers/plugin.py
+++ b/qiita_db/handlers/plugin.py
@@ -100,16 +100,23 @@ def post(self, name, version):
cmd_desc = self.get_argument('description')
req_params = loads(self.get_argument('required_parameters'))
opt_params = loads(self.get_argument('optional_parameters'))
+
+ for p_name, (p_type, dflt) in opt_params.items():
+ if p_type.startswith('mchoice'):
+ opt_params[p_name] = [p_type, loads(dflt)]
+
outputs = self.get_argument('outputs', None)
if outputs:
outputs = loads(outputs)
dflt_param_set = loads(self.get_argument('default_parameter_sets'))
+ analysis_only = self.get_argument('analysis_only', False)
parameters = req_params
parameters.update(opt_params)
cmd = qdb.software.Command.create(
- plugin, cmd_name, cmd_desc, parameters, outputs)
+ plugin, cmd_name, cmd_desc, parameters, outputs,
+ analysis_only=analysis_only)
if dflt_param_set is not None:
for name, vals in dflt_param_set.items():
@@ -221,4 +228,6 @@ def post(self):
for fp in conf_files:
s = qdb.software.Software.from_file(fp, update=True)
s.activate()
+ s.register_commands()
+
self.finish()
diff --git a/qiita_db/handlers/processing_job.py b/qiita_db/handlers/processing_job.py
index efd5a94bd..84efc21af 100644
--- a/qiita_db/handlers/processing_job.py
+++ b/qiita_db/handlers/processing_job.py
@@ -11,7 +11,6 @@
from tornado.web import HTTPError
-from qiita_core.qiita_settings import qiita_config
import qiita_db as qdb
from .oauth2 import OauthBaseHandler, authenticate_oauth
@@ -59,13 +58,10 @@ def _job_completer(job_id, payload):
completing the job
"""
import qiita_db as qdb
- cmd = "%s '%s' %s %s '%s'" % (qiita_config.private_launcher,
- qiita_config.qiita_env, 'complete_job',
- job_id, payload)
- std_out, std_err, return_value = qdb.processing_job._system_call(cmd)
- if return_value != 0:
- error = ("Can't submit private task 'complete job:\n"
- "Std output:%s\nStd error:%s'" % (std_out, std_err))
+
+ success, error = qdb.processing_job.private_job_submitter(
+ "Complete job %s" % job_id, 'complete_job', [job_id, payload])
+ if not success:
job = qdb.processing_job.ProcessingJob(job_id)
job.complete(False, error=error)
diff --git a/qiita_db/handlers/tests/test_analysis.py b/qiita_db/handlers/tests/test_analysis.py
new file mode 100644
index 000000000..2c20c64a3
--- /dev/null
+++ b/qiita_db/handlers/tests/test_analysis.py
@@ -0,0 +1,106 @@
+# -----------------------------------------------------------------------------
+# Copyright (c) 2014--, The Qiita Development Team.
+#
+# Distributed under the terms of the BSD 3-clause License.
+#
+# The full license is in the file LICENSE, distributed with this software.
+# -----------------------------------------------------------------------------
+
+from unittest import main, TestCase
+from json import loads
+
+from tornado.web import HTTPError
+
+from qiita_db.handlers.tests.oauthbase import OauthTestingBase
+from qiita_db.handlers.analysis import _get_analysis
+import qiita_db as qdb
+
+
+class UtilTests(TestCase):
+ def test_get_analysis(self):
+ obs = _get_analysis(1)
+ exp = qdb.analysis.Analysis(1)
+ self.assertEqual(obs, exp)
+
+ # It doesn't exist
+ with self.assertRaises(HTTPError):
+ _get_analysis(100)
+
+
+class APIAnalysisMetadataHandlerTests(OauthTestingBase):
+ def test_get_does_not_exist(self):
+ obs = self.get('/qiita_db/analysis/100/metadata/', headers=self.header)
+ self.assertEqual(obs.code, 404)
+
+ def test_get_no_header(self):
+ obs = self.get('/qiita_db/analysis/1/metadata/')
+ self.assertEqual(obs.code, 400)
+
+ def test_get(self):
+ obs = self.get('/qiita_db/analysis/1/metadata/', headers=self.header)
+ self.assertEqual(obs.code, 200)
+
+ obs = loads(obs.body)
+ exp = ['1.SKM4.640180', '1.SKB8.640193', '1.SKD8.640184',
+ '1.SKM9.640192', '1.SKB7.640196']
+ self.assertItemsEqual(obs, exp)
+
+ exp = {'platform': 'Illumina', 'longitude': '95.5088566087',
+ 'experiment_center': 'ANL', 'center_name': 'ANL',
+ 'run_center': 'ANL', 'run_prefix': 's_G1_L001_sequences',
+ 'sample_type': 'ENVO:soil',
+ 'common_name': 'rhizosphere metagenome', 'samp_size': '.25,g',
+ 'has_extracted_data': 'True', 'water_content_soil': '0.101',
+ 'target_gene': '16S rRNA',
+ 'env_feature': 'ENVO:plant-associated habitat',
+ 'sequencing_meth': 'Sequencing by synthesis',
+ 'Description': 'Cannabis Soil Microbiome', 'run_date': '8/1/12',
+ 'qiita_owner': 'Dude', 'altitude': '0.0',
+ 'BarcodeSequence': 'TCGACCAAACAC',
+ 'env_biome': 'ENVO:Temperate grasslands, savannas, and '
+ 'shrubland biome',
+ 'texture': '63.1 sand, 17.7 silt, 19.2 clay',
+ 'pcr_primers': 'FWD:GTGCCAGCMGCCGCGGTAA; '
+ 'REV:GGACTACHVGGGTWTCTAAT',
+ 'experiment_title': 'Cannabis Soil Microbiome',
+ 'library_construction_protocol':
+ 'This analysis was done as in Caporaso et al 2011 Genome '
+ 'research. The PCR primers (F515/R806) were developed '
+ 'against the V4 region of the 16S rRNA (both bacteria and '
+ 'archaea), which we determined would yield optimal '
+ 'community clustering with reads of this length using a '
+ 'procedure similar to that of ref. 15. [For reference, '
+ 'this primer pair amplifies the region 533_786 in the '
+ 'Escherichia coli strain 83972 sequence (greengenes '
+ 'accession no. prokMSA_id:470367).] The reverse PCR primer '
+ 'is barcoded with a 12-base error-correcting Golay code to '
+ 'facilitate multiplexing of up to 1,500 samples per lane, '
+ 'and both PCR primers contain sequencer adapter regions.',
+ 'experiment_design_description':
+ 'micro biome of soil and rhizosphere of cannabis plants '
+ 'from CA',
+ 'study_center': 'CCME', 'physical_location': 'ANL',
+ 'qiita_prep_id': '1', 'taxon_id': '939928',
+ 'has_physical_specimen': 'True', 'ph': '6.82',
+ 'description_duplicate': 'Bucu Rhizo',
+ 'qiita_study_alias': 'Cannabis Soils', 'sample_center': 'ANL',
+ 'elevation': '114.0', 'illumina_technology': 'MiSeq',
+ 'assigned_from_geo': 'n',
+ 'collection_timestamp': '2011-11-11 13:00:00',
+ 'latitude': '31.7167821863',
+ 'LinkerPrimerSequence': 'GTGCCAGCMGCCGCGGTAA',
+ 'qiita_principal_investigator': 'PIDude', 'host_taxid': '3483',
+ 'samp_salinity': '7.44', 'host_subject_id': '1001:D2',
+ 'target_subfragment': 'V4', 'season_environment': 'winter',
+ 'temp': '15.0', 'emp_status': 'EMP',
+ 'country': 'GAZ:United States of America',
+ 'instrument_model': 'Illumina MiSeq',
+ 'qiita_study_title': 'Identification of the Microbiomes for '
+ 'Cannabis Soils',
+ 'tot_nitro': '1.3', 'depth': '0.15',
+ 'anonymized_name': 'SKM4', 'tot_org_carb': '3.31'}
+ self.assertEqual(obs['1.SKM4.640180'], exp)
+
+
+if __name__ == '__main__':
+ main()
diff --git a/qiita_db/handlers/tests/test_artifact.py b/qiita_db/handlers/tests/test_artifact.py
index dd64464c9..df3efc132 100644
--- a/qiita_db/handlers/tests/test_artifact.py
+++ b/qiita_db/handlers/tests/test_artifact.py
@@ -9,13 +9,16 @@
from unittest import main, TestCase
from json import loads
from functools import partial
-from os.path import join, exists
+from os.path import join, exists, isfile
from os import close, remove
-from tempfile import mkstemp
+from shutil import rmtree
+from tempfile import mkstemp, mkdtemp
from json import dumps
from tornado.web import HTTPError
import pandas as pd
+from biom import example_table as et
+from biom.util import biom_open
from qiita_db.handlers.tests.oauthbase import OauthTestingBase
import qiita_db as qdb
@@ -37,15 +40,16 @@ class ArtifactHandlerTests(OauthTestingBase):
def setUp(self):
super(ArtifactHandlerTests, self).setUp()
- fd, self.html_fp = mkstemp(suffix=".html")
- close(fd)
- self._clean_up_files = [self.html_fp]
+ self._clean_up_files = []
def tearDown(self):
super(ArtifactHandlerTests, self).tearDown()
for fp in self._clean_up_files:
if exists(fp):
- remove(fp)
+ if isfile(fp):
+ remove(fp)
+ else:
+ rmtree(fp)
def test_get_artifact_does_not_exist(self):
obs = self.get('/qiita_db/artifacts/100/', headers=self.header)
@@ -77,23 +81,71 @@ def test_get_artifact(self):
'is_submitted_to_vamps': None,
'prep_information': [1],
'study': 1,
+ 'analysis': None,
'processing_parameters': None,
'files': exp_fps}
self.assertEqual(loads(obs.body), exp)
+ obs = self.get('/qiita_db/artifacts/9/', headers=self.header)
+ self.assertEqual(obs.code, 200)
+ db_test_raw_dir = qdb.util.get_mountpoint('analysis')[0][1]
+ path_builder = partial(join, db_test_raw_dir)
+ exp_fps = {"biom": [path_builder('1_analysis_18S.biom')]}
+ exp = {
+ 'name': 'noname',
+ 'visibility': 'sandbox',
+ 'type': 'BIOM',
+ 'data_type': '18S',
+ 'can_be_submitted_to_ebi': False,
+ 'ebi_run_accessions': None,
+ 'can_be_submitted_to_vamps': False,
+ 'is_submitted_to_vamps': None,
+ 'prep_information': [],
+ 'study': None,
+ 'analysis': 1,
+ 'processing_parameters': {'biom_table': 8, 'depth': 9000,
+ 'subsample_multinomial': False},
+ 'files': exp_fps}
+ obs = loads(obs.body)
+ # The timestamp is genreated at patch time, so we can't check for it
+ del obs['timestamp']
+ self.assertEqual(obs, exp)
+
def test_patch(self):
+ fd, html_fp = mkstemp(suffix=".html")
+ close(fd)
+ self._clean_up_files.append(html_fp)
+ # correct argument with a single HTML
arguments = {'op': 'add', 'path': '/html_summary/',
- 'value': self.html_fp}
- self.assertIsNone(qdb.artifact.Artifact(1).html_summary_fp)
+ 'value': html_fp}
+ artifact = qdb.artifact.Artifact(1)
+ self.assertIsNone(artifact.html_summary_fp)
obs = self.patch('/qiita_db/artifacts/1/',
headers=self.header,
data=arguments)
self.assertEqual(obs.code, 200)
- self.assertIsNotNone(qdb.artifact.Artifact(1).html_summary_fp)
+ self.assertIsNotNone(artifact.html_summary_fp)
+
+ # Correct argument with an HMTL and a directory
+ fd, html_fp = mkstemp(suffix=".html")
+ close(fd)
+ self._clean_up_files.append(html_fp)
+ html_dir = mkdtemp()
+ self._clean_up_files.append(html_dir)
+ arguments = {'op': 'add', 'path': '/html_summary/',
+ 'value': dumps({'html': html_fp, 'dir': html_dir})}
+ obs = self.patch('/qiita_db/artifacts/1/',
+ headers=self.header,
+ data=arguments)
+ self.assertEqual(obs.code, 200)
+ self.assertIsNotNone(artifact.html_summary_fp)
+ html_dir = [fp for _, fp, fp_type in artifact.filepaths
+ if fp_type == 'html_summary_dir']
+ self.assertEqual(len(html_dir), 1)
# Wrong operation
arguments = {'op': 'wrong', 'path': '/html_summary/',
- 'value': self.html_fp}
+ 'value': html_fp}
obs = self.patch('/qiita_db/artifacts/1/',
headers=self.header,
data=arguments)
@@ -103,7 +155,7 @@ def test_patch(self):
# Wrong path parameter
arguments = {'op': 'add', 'path': '/wrong/',
- 'value': self.html_fp}
+ 'value': html_fp}
obs = self.patch('/qiita_db/artifacts/1/',
headers=self.header,
data=arguments)
@@ -112,7 +164,7 @@ def test_patch(self):
# Wrong value parameter
arguments = {'op': 'add', 'path': '/html_summary/',
- 'value': self.html_fp}
+ 'value': html_fp}
obs = self.patch('/qiita_db/artifacts/1/',
headers=self.header,
data=arguments)
@@ -180,6 +232,27 @@ def test_post(self):
self._clean_up_files.extend([fp for _, fp, _ in a.filepaths])
self.assertEqual(a.name, "New test artifact")
+ def test_post_analysis(self):
+ fd, fp = mkstemp(suffix='_table.biom')
+ close(fd)
+ with biom_open(fp, 'w') as f:
+ et.to_hdf5(f, "test")
+ self._clean_up_files.append(fp)
+
+ data = {'filepaths': dumps([(fp, 'biom')]),
+ 'type': "BIOM",
+ 'name': "New biom artifact",
+ 'analysis': 1,
+ 'data_type': '16S'}
+ obs = self.post('/apitest/artifact/', headers=self.header, data=data)
+ self.assertEqual(obs.code, 200)
+ obs = loads(obs.body)
+ self.assertEqual(obs.keys(), ['artifact'])
+
+ a = qdb.artifact.Artifact(obs['artifact'])
+ self._clean_up_files.extend([afp for _, afp, _ in a.filepaths])
+ self.assertEqual(a.name, "New biom artifact")
+
def test_post_error(self):
data = {'filepaths': dumps([('Do not exist', 'raw_forward_seqs')]),
'type': "FASTQ",
diff --git a/qiita_db/handlers/tests/test_plugin.py b/qiita_db/handlers/tests/test_plugin.py
index 036a58f32..0fb108377 100644
--- a/qiita_db/handlers/tests/test_plugin.py
+++ b/qiita_db/handlers/tests/test_plugin.py
@@ -56,7 +56,9 @@ def test_get(self):
'for performing microbiome analysis from raw DNA '
'sequencing data',
'commands': ['Split libraries FASTQ', 'Split libraries',
- 'Pick closed-reference OTUs'],
+ 'Pick closed-reference OTUs', 'Summarize Taxa',
+ 'Beta Diversity', 'Alpha Rarefaction',
+ 'Single Rarefaction'],
'publications': [{'DOI': '10.1038/nmeth.f.303',
'PubMed': '20383131'}],
'default_workflows': ['FASTQ upstream workflow',
@@ -74,9 +76,12 @@ def test_post(self):
'description': 'Command added for testing',
'required_parameters': dumps(
{'in_data': ['artifact:["FASTA"]', None]}),
- 'optional_parameters': dumps({'param1': ['string', ''],
- 'param2': ['float', '1.5'],
- 'param3': ['boolean', 'True']}),
+ 'optional_parameters': dumps(
+ {'param1': ['string', ''],
+ 'param2': ['float', '1.5'],
+ 'param3': ['boolean', 'True'],
+ 'param4': ['mchoice:["opt1", "opt2", "opt3"]',
+ dumps(['opt1', 'opt2'])]}),
'outputs': dumps({'out1': 'BIOM'}),
'default_parameter_sets': dumps(
{'dflt1': {'param1': 'test',
@@ -88,6 +93,25 @@ def test_post(self):
self.assertEqual(obs.code, 200)
obs = _get_command('QIIME', '1.9.1', 'New Command')
self.assertEqual(obs.name, 'New Command')
+ self.assertFalse(obs.analysis_only)
+
+ # Create a new command that is analysis only
+ data = {
+ 'name': 'New analysis command',
+ 'description': 'Analysis command added for testing',
+ 'required_parameters': dumps(
+ {'in_data': ['artifact:["BIOM"]', None]}),
+ 'optional_parameters': dumps({'param1': ['string', 'default']}),
+ 'outputs': dumps({'outtable': 'BIOM'}),
+ 'default_parameter_sets': dumps({'dflt1': {'param1': 'test'}}),
+ 'analysis_only': True
+ }
+ obs = self.post('/qiita_db/plugins/QIIME/1.9.1/commands/', data=data,
+ headers=self.header)
+ self.assertEqual(obs.code, 200)
+ obs = _get_command('QIIME', '1.9.1', 'New analysis command')
+ self.assertEqual(obs.name, 'New analysis command')
+ self.assertTrue(obs.analysis_only)
class CommandHandlerTests(OauthTestingBase):
diff --git a/qiita_db/investigation.py b/qiita_db/investigation.py
index 9f04ada0f..a2c07b1c2 100644
--- a/qiita_db/investigation.py
+++ b/qiita_db/investigation.py
@@ -23,7 +23,7 @@
REQUIRED_KEYS = {"name", "description", "contact_person"}
-class Investigation(qdb.base.QiitaStatusObject):
+class Investigation(qdb.base.QiitaObject):
"""
Study object to access to the Qiita Study information
diff --git a/qiita_db/job.py b/qiita_db/job.py
deleted file mode 100644
index 2cb016717..000000000
--- a/qiita_db/job.py
+++ /dev/null
@@ -1,611 +0,0 @@
-r"""
-Data objects (:mod: `qiita_db.data`)
-====================================
-
-..currentmodule:: qiita_db.data
-
-This module provides functionality for creating, running, and storing results
-of jobs in an analysis. It also provides the ability to query what commmands
-are available for jobs, as well as the options for these commands.
-
-Classes
--------
-
-..autosummary::
- :toctree: generated/
-
- Job
- Command
-"""
-# -----------------------------------------------------------------------------
-# Copyright (c) 2014--, The Qiita Development Team.
-#
-# Distributed under the terms of the BSD 3-clause License.
-#
-# The full license is in the file LICENSE, distributed with this software.
-# -----------------------------------------------------------------------------
-from __future__ import division
-from json import loads
-from os.path import join, relpath
-from glob import glob
-from functools import partial
-from collections import defaultdict
-
-import qiita_db as qdb
-
-
-class Job(qdb.base.QiitaStatusObject):
- """
- Job object to access to the Qiita Job information
-
- Attributes
- ----------
- datatype
- command
- options
- results
- error
-
- Methods
- -------
- set_error
- add_results
- """
- _table = "job"
-
- def _lock_job(self):
- """Raises QiitaDBStatusError if study is public"""
- if self.check_status(("completed", "error")):
- raise qdb.exceptions.QiitaDBStatusError(
- "Can't change status of finished job!")
-
- def _status_setter_checks(self):
- r"""Perform a check to make sure not setting status away from completed
- or errored
- """
- self._lock_job()
-
- @staticmethod
- def get_commands():
- """returns commands available with the options as well
-
- Returns
- -------
- list of command objects
- """
- return Command.create_list()
-
- @classmethod
- def exists(cls, datatype, command, options, analysis,
- input_file_reference, input_file_software_command,
- return_existing=False):
- """Checks if the given job already exists
-
- Parameters
- ----------
- datatype : str
- Datatype the job is operating on
- command : str
- The name of the command run on the data
- options : dict
- Options for the command in the format {option: value}
- analysis : Analysis object
- The analysis the job will be attached to on creation
- input_file_reference : Reference object
- The reference object used to create the input file
- input_file_software_command: Software.Command object
- The software command object used to create the input file
- return_existing : bool, optional
- If True, function will return the instatiated Job object for the
- matching job. Default False
-
- Returns
- -------
- bool
- Whether the job exists or not
- Job or None, optional
- If return_existing is True, the Job object of the matching job or
- None if none exists
- """
- with qdb.sql_connection.TRN:
- # check passed arguments and grab analyses for matching jobs
- datatype_id = qdb.util.convert_to_id(datatype, "data_type")
- sql = "SELECT command_id FROM qiita.command WHERE name = %s"
- qdb.sql_connection.TRN.add(sql, [command])
- command_id = qdb.sql_connection.TRN.execute_fetchlast()
-
- opts_json = qdb.util.params_dict_to_json(options)
- sql = """SELECT DISTINCT analysis_id, job_id
- FROM qiita.analysis_job
- JOIN qiita.{0} USING (job_id)
- WHERE data_type_id = %s
- AND command_id = %s
- AND options = %s
- AND input_file_reference_id = %s
- AND input_file_software_command_id = %s
- """.format(cls._table)
- rid = (input_file_reference.id
- if input_file_reference is not None else None)
- cid = (input_file_software_command.id
- if input_file_software_command is not None else None)
- qdb.sql_connection.TRN.add(
- sql, [datatype_id, command_id, opts_json, rid, cid])
- analyses = qdb.sql_connection.TRN.execute_fetchindex()
-
- if not analyses and return_existing:
- # stop looking since we have no possible matches
- return False, None
- elif not analyses:
- return False
-
- # build the samples dict as list of samples keyed to
- # their artifact_id
- sql = """SELECT artifact_id, array_agg(
- sample_id ORDER BY sample_id)
- FROM qiita.analysis_sample
- WHERE analysis_id = %s GROUP BY artifact_id"""
- qdb.sql_connection.TRN.add(sql, [analysis.id])
- samples = dict(qdb.sql_connection.TRN.execute_fetchindex())
-
- # check passed analyses' samples dict against all found analyses
- matched_job = None
- for aid, jid in analyses:
- # build the samples dict for a found analysis
- qdb.sql_connection.TRN.add(sql, [aid])
- comp_samples = dict(
- qdb.sql_connection.TRN.execute_fetchindex())
-
- # compare samples and stop checking if a match is found
- matched_samples = samples == comp_samples
- if matched_samples:
- matched_job = jid
- break
-
- if return_existing:
- return matched_samples, (cls(matched_job) if matched_job
- else None)
-
- return matched_samples
-
- @classmethod
- def delete(cls, jobid):
- """Removes a job and all files attached to it
-
- Parameters
- ----------
- jobid : int
- ID of the job to delete
-
- Notes
- -----
- This function will remove a job from all analyses it is attached to in
- analysis_job table, as well as the job itself from the job table. All
- files and references to files for the job will be removed from the
- filepath and job_results_filepath tables. All the job's files on the
- filesystem will also be removed.
- """
- with qdb.sql_connection.TRN:
- # store filepath info for later use
- sql = """SELECT filepath, filepath_id
- FROM qiita.filepath
- JOIN qiita.job_results_filepath USING (filepath_id)
- WHERE job_id = %s"""
- args = [jobid]
- qdb.sql_connection.TRN.add(sql, args)
- filepaths = qdb.sql_connection.TRN.execute_fetchindex()
-
- # remove fiepath links in DB
- sql = "DELETE FROM qiita.job_results_filepath WHERE job_id = %s"
- qdb.sql_connection.TRN.add(sql, args)
-
- sql = "DELETE FROM qiita.filepath WHERE filepath_id IN %s"
- qdb.sql_connection.TRN.add(sql, [tuple(fp[1] for fp in filepaths)])
-
- # remove job
- sql = "DELETE FROM qiita.analysis_job WHERE job_id = %s"
- qdb.sql_connection.TRN.add(sql, args)
- sql = "DELETE FROM qiita.collection_job WHERE job_id = %s"
- qdb.sql_connection.TRN.add(sql, args)
- sql = "DELETE FROM qiita.job WHERE job_id = %s"
- qdb.sql_connection.TRN.add(sql, args)
-
- qdb.sql_connection.TRN.execute()
-
- @classmethod
- def create(cls, datatype, command, options, analysis,
- input_file_reference, input_file_software_command,
- return_existing=False):
- """Creates a new job on the database
-
- Parameters
- ----------
- datatype : str
- The datatype in which this job applies
- command : str
- The name of the command executed in this job
- analysis : Analysis object
- The analysis which this job belongs to
- input_file_reference : Reference object
- The reference object used to create the input file
- input_file_software_command: Software.Command object
- The software command object used to create the input file
- return_existing : bool, optional
- If True, returns an instantiated Job object pointing to an already
- existing job with the given parameters. Default False
-
- Returns
- -------
- Job object
- The newly created job
-
- Raises
- ------
- QiitaDBDuplicateError
- return_existing is False and an exact duplicate of the job already
- exists in the DB.
- """
- with qdb.sql_connection.TRN:
- analysis_sql = """INSERT INTO qiita.analysis_job
- (analysis_id, job_id) VALUES (%s, %s)"""
- exists, job = cls.exists(datatype, command, options, analysis,
- input_file_reference,
- input_file_software_command,
- return_existing=True)
-
- if exists:
- if return_existing:
- # add job to analysis
- qdb.sql_connection.TRN.add(
- analysis_sql, [analysis.id, job.id])
- qdb.sql_connection.TRN.execute()
- return job
- else:
- raise qdb.exceptions.QiitaDBDuplicateError(
- "Job", "datatype: %s, command: %s, options: %s, "
- "analysis: %s"
- % (datatype, command, options, analysis.id))
-
- # Get the datatype and command ids from the strings
- datatype_id = qdb.util.convert_to_id(datatype, "data_type")
- sql = "SELECT command_id FROM qiita.command WHERE name = %s"
- qdb.sql_connection.TRN.add(sql, [command])
- command_id = qdb.sql_connection.TRN.execute_fetchlast()
- opts_json = qdb.util.params_dict_to_json(options)
-
- # Create the job and return it
- sql = """INSERT INTO qiita.{0} (data_type_id, job_status_id,
- command_id, options,
- input_file_reference_id,
- input_file_software_command_id)
- VALUES (%s, %s, %s, %s, %s, %s)
- RETURNING job_id""".format(cls._table)
- rid = (input_file_reference.id
- if input_file_reference is not None else None)
- cid = (input_file_software_command.id
- if input_file_software_command is not None else None)
- qdb.sql_connection.TRN.add(
- sql, [datatype_id, 1, command_id, opts_json, rid, cid])
- job_id = qdb.sql_connection.TRN.execute_fetchlast()
-
- # add job to analysis
- qdb.sql_connection.TRN.add(analysis_sql, [analysis.id, job_id])
- qdb.sql_connection.TRN.execute()
-
- return cls(job_id)
-
- @property
- def datatype(self):
- with qdb.sql_connection.TRN:
- sql = """SELECT data_type
- FROM qiita.data_type
- WHERE data_type_id = (
- SELECT data_type_id
- FROM qiita.{0}
- WHERE job_id = %s)""".format(self._table)
- qdb.sql_connection.TRN.add(sql, [self._id])
- return qdb.sql_connection.TRN.execute_fetchlast()
-
- @property
- def command(self):
- """Returns the command of the job as (name, command)
-
- Returns
- -------
- str
- command run by the job
- """
- with qdb.sql_connection.TRN:
- sql = """SELECT name, command
- FROM qiita.command
- WHERE command_id = (
- SELECT command_id
- FROM qiita.{0}
- WHERE job_id = %s)""".format(self._table)
- qdb.sql_connection.TRN.add(sql, [self._id])
- # We only want the first row (the only one present)
- return qdb.sql_connection.TRN.execute_fetchindex()[0]
-
- @property
- def options(self):
- """Options used in the job
-
- Returns
- -------
- dict
- options in the format {option: setting}
- """
- with qdb.sql_connection.TRN:
- sql = """SELECT options FROM qiita.{0}
- WHERE job_id = %s""".format(self._table)
- qdb.sql_connection.TRN.add(sql, [self._id])
- db_opts = qdb.sql_connection.TRN.execute_fetchlast()
- opts = loads(db_opts) if db_opts else {}
-
- sql = """SELECT command, output
- FROM qiita.command
- WHERE command_id = (
- SELECT command_id
- FROM qiita.{0}
- WHERE job_id = %s)""".format(self._table)
- qdb.sql_connection.TRN.add(sql, [self._id])
- db_comm = qdb.sql_connection.TRN.execute_fetchindex()[0]
-
- out_opt = loads(db_comm[1])
- _, mp, _ = qdb.util.get_mountpoint('job', retrieve_subdir=True)[0]
- join_f = partial(join, mp)
- for k in out_opt:
- opts[k] = join_f("%s_%s_%s" % (self._id, db_comm[0],
- k.strip("-")))
- return opts
-
- @options.setter
- def options(self, opts):
- """ Sets the options for the job
-
- Parameters
- ----------
- opts: dict
- The options for the command in format {option: value}
- """
- with qdb.sql_connection.TRN:
- # make sure job is editable
- self._lock_job()
-
- # JSON the options dictionary
- opts_json = qdb.util.params_dict_to_json(opts)
- # Add the options to the job
- sql = """UPDATE qiita.{0} SET options = %s
- WHERE job_id = %s""".format(self._table)
- qdb.sql_connection.TRN.add(sql, [opts_json, self._id])
- qdb.sql_connection.TRN.execute()
-
- @property
- def results(self):
- """List of job result filepaths
-
- Returns
- -------
- list
- Filepaths to the result files
- """
- # Select results filepaths and filepath types from the database
- with qdb.sql_connection.TRN:
- _, basedir = qdb.util.get_mountpoint('job')[0]
- sql = """SELECT filepath, filepath_type
- FROM qiita.filepath
- JOIN qiita.filepath_type USING (filepath_type_id)
- JOIN qiita.job_results_filepath USING (filepath_id)
- WHERE job_id = %s"""
- qdb.sql_connection.TRN.add(sql, [self._id])
- results = qdb.sql_connection.TRN.execute_fetchindex()
-
- def add_html(basedir, check_dir, result_fps):
- for res in glob(join(basedir, check_dir, "*.htm")) + \
- glob(join(basedir, check_dir, "*.html")):
- result_fps.append(relpath(res, basedir))
-
- # create new list, with relative paths from db base
- result_fps = []
- for fp in results:
- if fp[1] == "directory":
- # directory, so all html files in it are results
- # first, see if we have any in the main directory
- add_html(basedir, fp[0], result_fps)
- # now do all subdirectories
- add_html(basedir, join(fp[0], "*"), result_fps)
- else:
- # result is exact filepath given
- result_fps.append(fp[0])
- return result_fps
-
- @property
- def error(self):
- """String with an error message, if the job failed
-
- Returns
- -------
- str or None
- error message/traceback for a job, or None if none exists
- """
- with qdb.sql_connection.TRN:
- sql = "SELECT log_id FROM qiita.{0} WHERE job_id = %s".format(
- self._table)
- qdb.sql_connection.TRN.add(sql, [self._id])
- logging_id = qdb.sql_connection.TRN.execute_fetchlast()
- return (qdb.logger.LogEntry(logging_id)
- if logging_id is not None else None)
-
-# --- Functions ---
- def set_error(self, msg):
- """Logs an error for the job
-
- Parameters
- ----------
- msg : str
- Error message/stacktrace if available
- """
- with qdb.sql_connection.TRN:
- log_entry = qdb.logger.LogEntry.create(
- 'Runtime', msg, info={'job': self._id})
- self._lock_job()
-
- err_id = qdb.util.convert_to_id('error', 'job_status', 'status')
- # attach the error to the job and set to error
- sql = """UPDATE qiita.{0} SET log_id = %s, job_status_id = %s
- WHERE job_id = %s""".format(self._table)
- qdb.sql_connection.TRN.add(sql, [log_entry.id, err_id, self._id])
- qdb.sql_connection.TRN.execute()
-
- def add_results(self, results):
- """Adds a list of results to the results
-
- Parameters
- ----------
- results : list of tuples
- filepath information to add to job, in format
- [(filepath, type), ...]
- Where type is the filepath type of the filepath passed
-
- Notes
- -----
- Curently available file types are:
- biom, directory, plain_text
- """
- with qdb.sql_connection.TRN:
- self._lock_job()
- # convert all file type text to file type ids
- res_ids = [(fp, qdb.util.convert_to_id(fptype, "filepath_type"))
- for fp, fptype in results]
- file_ids = qdb.util.insert_filepaths(
- res_ids, self._id, self._table, "filepath", move_files=False)
-
- # associate filepaths with job
- sql = """INSERT INTO qiita.{0}_results_filepath
- (job_id, filepath_id)
- VALUES (%s, %s)""".format(self._table)
- qdb.sql_connection.TRN.add(
- sql, [[self._id, fid] for fid in file_ids], many=True)
- qdb.sql_connection.TRN.execute()
-
-
-class Command(object):
- """Holds all information on the commands available
-
- This will be an in-memory representation because the command table is
- considerably more static than other objects tables, changing only with new
- QIIME releases.
-
- Attributes
- ----------
- name
- command
- input_opts
- required_opts
- optional_opts
- output_opts
- """
- @classmethod
- def create_list(cls):
- """Creates list of all available commands
-
- Returns
- -------
- list of Command objects
- """
- with qdb.sql_connection.TRN:
- qdb.sql_connection.TRN.add("SELECT * FROM qiita.command")
- commands = qdb.sql_connection.TRN.execute_fetchindex()
- # create the list of command objects
- return [cls(c["name"], c["command"], c["input"], c["required"],
- c["optional"], c["output"]) for c in commands]
-
- @classmethod
- def get_commands_by_datatype(cls, datatypes=None):
- """Returns the commands available for all or a subset of the datatypes
-
- Parameters
- ----------
- datatypes : list of str, optional
- List of the datatypes to get commands for. Default is all datatypes
-
- Returns
- -------
- dict of lists of Command objects
- Returns commands in the format {datatype: [com name1, com name2]}
-
- Notes
- -----
- If no datatypes are passed, the function will default to returning all
- datatypes available.
- """
- with qdb.sql_connection.TRN:
- # get the ids of the datatypes to get commands for
- if datatypes is not None:
- datatype_info = [(qdb.util.convert_to_id(dt, "data_type"), dt)
- for dt in datatypes]
- else:
- sql = "SELECT data_type_id, data_type from qiita.data_type"
- qdb.sql_connection.TRN.add(sql)
- datatype_info = qdb.sql_connection.TRN.execute_fetchindex()
-
- commands = defaultdict(list)
- # get commands for each datatype
- sql = """SELECT C.*
- FROM qiita.command C
- JOIN qiita.command_data_type USING (command_id)
- WHERE data_type_id = %s"""
- for dt_id, dt in datatype_info:
- qdb.sql_connection.TRN.add(sql, [dt_id])
- comms = qdb.sql_connection.TRN.execute_fetchindex()
- for comm in comms:
- commands[dt].append(cls(comm["name"], comm["command"],
- comm["input"],
- comm["required"],
- comm["optional"],
- comm["output"]))
- return commands
-
- def __eq__(self, other):
- if type(self) != type(other):
- return False
- if self.name != other.name:
- return False
- if self.command != other.command:
- return False
- if self.input_opts != other.input_opts:
- return False
- if self.output_opts != other.output_opts:
- return False
- if self.required_opts != other.required_opts:
- return False
- if self.optional_opts != other.optional_opts:
- return False
- return True
-
- def __ne__(self, other):
- return not self.__eq__(other)
-
- def __init__(self, name, command, input_opts, required_opts,
- optional_opts, output_opts):
- """Creates the command object
-
- Parameters:
- name : str
- Name of the command
- command: str
- python command to run
- input_opts : str
- JSON of input options for the command
- required_opts : str
- JSON of required options for the command
- optional_opts : str
- JSON of optional options for the command
- output_opts : str
- JSON of output options for the command
- """
- self.name = name
- self.command = command
- self.input_opts = loads(input_opts)
- self.required_opts = loads(required_opts)
- self.optional_opts = loads(optional_opts)
- self.output_opts = loads(output_opts)
diff --git a/qiita_db/meta_util.py b/qiita_db/meta_util.py
index 78bc8ee3f..1978ca5e4 100644
--- a/qiita_db/meta_util.py
+++ b/qiita_db/meta_util.py
@@ -97,15 +97,12 @@ def validate_filepath_access_by_user(user, filepath_id):
(SELECT array_agg(prep_template_id)
FROM qiita.prep_template_filepath
WHERE filepath_id = {0}) AS prep_info,
- (SELECT array_agg(job_id)
- FROM qiita.job_results_filepath
- WHERE filepath_id = {0}) AS job_results,
(SELECT array_agg(analysis_id)
FROM qiita.analysis_filepath
WHERE filepath_id = {0}) AS analysis""".format(filepath_id)
TRN.add(sql)
- arid, sid, pid, jid, anid = TRN.execute_fetchflatten()
+ arid, sid, pid, anid = TRN.execute_fetchflatten()
# artifacts
if arid:
@@ -114,8 +111,14 @@ def validate_filepath_access_by_user(user, filepath_id):
if artifact.visibility == 'public':
return True
else:
- # let's take the visibility via the Study
- return artifact.study.has_access(user)
+ study = artifact.study
+ if study:
+ # let's take the visibility via the Study
+ return artifact.study.has_access(user)
+ else:
+ analysis = artifact.analysis
+ return analysis in (
+ user.private_analyses | user.shared_analyses)
# sample info files
elif sid:
# the visibility of the sample info file is given by the
@@ -144,22 +147,13 @@ def validate_filepath_access_by_user(user, filepath_id):
return True
return False
# analyses
- elif anid or jid:
- if jid:
- # [0] cause we should only have 1
- sql = """SELECT analysis_id FROM qiita.analysis_job
- WHERE job_id = {0}""".format(jid[0])
- TRN.add(sql)
- aid = TRN.execute_fetchlast()
- else:
- aid = anid[0]
+ elif anid:
# [0] cause we should only have 1
+ aid = anid[0]
analysis = qdb.analysis.Analysis(aid)
- if analysis.status == 'public':
- return True
- else:
- return analysis in (
- user.private_analyses | user.shared_analyses)
+ return analysis in (
+ user.private_analyses | user.shared_analyses)
+ return False
def update_redis_stats():
diff --git a/qiita_db/metadata_template/base_metadata_template.py b/qiita_db/metadata_template/base_metadata_template.py
index a4de41ad3..4bd340dfb 100644
--- a/qiita_db/metadata_template/base_metadata_template.py
+++ b/qiita_db/metadata_template/base_metadata_template.py
@@ -557,10 +557,7 @@ def _common_creation_steps(cls, md_template, obj_id):
table_name = cls._table_name(obj_id)
column_datatype = ["%s varchar" % col for col in headers]
sql = """CREATE TABLE qiita.{0} (
- sample_id varchar NOT NULL, {1},
- CONSTRAINT fk_{0} FOREIGN KEY (sample_id)
- REFERENCES qiita.study_sample (sample_id)
- ON UPDATE CASCADE
+ sample_id varchar NOT NULL, {1}
)""".format(table_name, ', '.join(column_datatype))
qdb.sql_connection.TRN.add(sql)
diff --git a/qiita_db/metadata_template/test/test_prep_template.py b/qiita_db/metadata_template/test/test_prep_template.py
index 8e39d2ee6..82d72a79a 100644
--- a/qiita_db/metadata_template/test/test_prep_template.py
+++ b/qiita_db/metadata_template/test/test_prep_template.py
@@ -876,8 +876,6 @@ def _common_creation_checks(self, pt, fp_count):
# prep and qiime files have been created
filepaths = pt.get_filepaths()
self.assertEqual(len(filepaths), 2)
- self.assertEqual(filepaths[0][0], fp_count + 2)
- self.assertEqual(filepaths[1][0], fp_count + 1)
def test_create(self):
"""Creates a new PrepTemplate"""
@@ -932,7 +930,6 @@ def test_create_data_type_id(self):
def test_create_warning(self):
"""Warns if a required columns is missing for a given functionality
"""
- fp_count = qdb.util.get_count("qiita.filepath")
del self.metadata['barcode']
pt = npt.assert_warns(
qdb.exceptions.QiitaDBWarning,
@@ -1000,8 +997,6 @@ def test_create_warning(self):
# prep and qiime files have been created
filepaths = pt.get_filepaths()
self.assertEqual(len(filepaths), 2)
- self.assertEqual(filepaths[0][0], fp_count + 2)
- self.assertEqual(filepaths[1][0], fp_count + 1)
def test_create_investigation_type_error(self):
"""Create raises an error if the investigation_type does not exists"""
diff --git a/qiita_db/metadata_template/test/test_sample_template.py b/qiita_db/metadata_template/test/test_sample_template.py
index bca13f35d..c9addf20e 100644
--- a/qiita_db/metadata_template/test/test_sample_template.py
+++ b/qiita_db/metadata_template/test/test_sample_template.py
@@ -1320,7 +1320,7 @@ def test_get_filepath(self):
# change based on time and the same functionality is being tested
# in data.py
exp_id = self.conn_handler.execute_fetchone(
- "SELECT count(1) FROM qiita.filepath")[0] + 1
+ "SELECT last_value FROM qiita.filepath_filepath_id_seq")[0] + 1
st = qdb.metadata_template.sample_template.SampleTemplate.create(
self.metadata, self.new_study)
self.assertEqual(st.get_filepaths()[0][0], exp_id)
diff --git a/qiita_db/portal.py b/qiita_db/portal.py
index 659a50bac..8ddebad65 100644
--- a/qiita_db/portal.py
+++ b/qiita_db/portal.py
@@ -91,14 +91,10 @@ def create(cls, portal, desc):
SELECT email FROM qiita.qiita_user
LOOP
INSERT INTO qiita.analysis
- (email, name, description, dflt,
- analysis_status_id)
- VALUES (eml, eml || '-dflt', 'dflt', true, 1)
+ (email, name, description, dflt)
+ VALUES (eml, eml || '-dflt', 'dflt', true)
RETURNING analysis_id INTO aid;
- INSERT INTO qiita.analysis_workflow (analysis_id, step)
- VALUES (aid, 2);
-
INSERT INTO qiita.analysis_portal
(analysis_id, portal_type_id)
VALUES (aid, pid);
@@ -162,9 +158,6 @@ def delete(portal):
DELETE FROM qiita.analysis_portal
WHERE analysis_id = aid;
- DELETE FROM qiita.analysis_workflow
- WHERE analysis_id = aid;
-
DELETE FROM qiita.analysis_sample
WHERE analysis_id = aid;
diff --git a/qiita_db/private.py b/qiita_db/private.py
new file mode 100644
index 000000000..78a286f51
--- /dev/null
+++ b/qiita_db/private.py
@@ -0,0 +1,74 @@
+# -----------------------------------------------------------------------------
+# Copyright (c) 2014--, The Qiita Development Team.
+#
+# Distributed under the terms of the BSD 3-clause License.
+#
+# The full license is in the file LICENSE, distributed with this software.
+# -----------------------------------------------------------------------------
+
+from json import dumps
+from sys import exc_info
+from time import sleep
+import traceback
+
+import qiita_db as qdb
+
+
+def build_analysis_files(job):
+ """Builds the files for an analysis
+
+ Parameters
+ ----------
+ job : qiita_db.processing_job.ProcessingJob
+ The processing job with the information for building the files
+ """
+ with qdb.sql_connection.TRN:
+ params = job.parameters.values
+ analysis_id = params['analysis']
+ merge_duplicated_sample_ids = params['merge_dup_sample_ids']
+ analysis = qdb.analysis.Analysis(analysis_id)
+ biom_files = analysis.build_files(merge_duplicated_sample_ids)
+
+ cmd = qdb.software.Command.get_validator('BIOM')
+ val_jobs = []
+ for dtype, biom_fp in biom_files:
+ validate_params = qdb.software.Parameters.load(
+ cmd, values_dict={'files': dumps({'biom': [biom_fp]}),
+ 'artifact_type': 'BIOM',
+ 'provenance': dumps({'job': job.id,
+ 'data_type': dtype}),
+ 'analysis': analysis_id})
+ val_jobs.append(qdb.processing_job.ProcessingJob.create(
+ analysis.owner, validate_params))
+
+ job._set_validator_jobs(val_jobs)
+
+ for j in val_jobs:
+ j.submit()
+ sleep(1)
+
+
+TASK_DICT = {'build_analysis_files': build_analysis_files}
+
+
+def private_task(job_id):
+ """Complets a Qiita private task
+
+ Parameters
+ ----------
+ job_id : str
+ The job id
+ """
+ if job_id == 'register':
+ # We don't need to do anything here if Qiita is registering plugins
+ return
+
+ job = qdb.processing_job.ProcessingJob(job_id)
+ job.update_heartbeat_state()
+ task_name = job.command.name
+
+ try:
+ TASK_DICT[task_name](job)
+ except Exception:
+ job.complete(False, error="Error executing private task: %s"
+ % traceback.format_exception(*exc_info()))
diff --git a/qiita_db/processing_job.py b/qiita_db/processing_job.py
index 4e99d3b5b..66e176feb 100644
--- a/qiita_db/processing_job.py
+++ b/qiita_db/processing_job.py
@@ -69,6 +69,30 @@ def _job_submitter(job, cmd):
job.complete(False, error=error)
+def private_job_submitter(job_name, command, args):
+ """Submits a private job
+
+ Parameters
+ ----------
+ job_name : str
+ The name of the job
+ command: str
+ The private command to be executed
+ args: list of str
+ The arguments to the private command
+ """
+
+ cmd = "%s '%s' %s %s" % (qiita_config.private_launcher,
+ qiita_config.qiita_env, command,
+ ' '.join("'%s'" % a for a in args))
+ std_out, std_err, return_value = _system_call(cmd)
+ error = ""
+ if return_value != 0:
+ error = ("Can't submit private task '%s':\n"
+ "Std output:%s\nStd error: %s" % (command, std_out, std_err))
+ return (return_value == 0), error
+
+
class ProcessingJob(qdb.base.QiitaObject):
r"""Models a job that executes a command in a set of artifacts
@@ -352,30 +376,49 @@ def release(self):
qdb.sql_connection.TRN.add(sql, [self.id])
a_info = qdb.sql_connection.TRN.execute_fetchlast()
- atype = a_info['artifact_type']
- filepaths = a_info['filepaths']
provenance = loads(self.parameters.values['provenance'])
job = ProcessingJob(provenance['job'])
- parents = job.input_artifacts
- params = job.parameters
+ if 'data_type' in a_info:
+ # This job is resulting from a private job
+ parents = None
+ params = None
+ cmd_out_id = None
+ data_type = a_info['data_type']
+ analysis = qdb.analysis.Analysis(
+ job.parameters.values['analysis'])
+ a_info = a_info['artifact_data']
+ else:
+ # This job is resulting from a plugin job
+ parents = job.input_artifacts
+ params = job.parameters
+ cmd_out_id = provenance['cmd_out_id']
+ analysis = None
+ data_type = None
# Create the artifact
+ atype = a_info['artifact_type']
+ filepaths = a_info['filepaths']
a = qdb.artifact.Artifact.create(
filepaths, atype, parents=parents,
- processing_parameters=params)
+ processing_parameters=params,
+ analysis=analysis, data_type=data_type)
- cmd_out_id = provenance['cmd_out_id']
- mapping = {cmd_out_id: a.id}
self._set_status('success')
+ mapping = {}
+ if cmd_out_id is not None:
+ mapping = {cmd_out_id: a.id}
+
return mapping
def release_validators(self):
"""Allows all the validator job spawned by this job to complete"""
with qdb.sql_connection.TRN:
- if self.command.software.type != 'artifact transformation':
+ if self.command.software.type not in ('artifact transformation',
+ 'private'):
raise qdb.exceptions.QiitaDBOperationNotPermittedError(
- "Only artifact transformation jobs can release validators")
+ "Only artifact transformation and private jobs can "
+ "release validators")
# Check if all the validators are ready by checking that there is
# no validator processing job whose status is not waiting
@@ -405,16 +448,17 @@ def release_validators(self):
vjob = ProcessingJob(jid)
mapping.update(vjob.release())
- sql = """INSERT INTO
- qiita.artifact_output_processing_job
- (artifact_id, processing_job_id,
- command_output_id)
- VALUES (%s, %s, %s)"""
- sql_args = [[aid, self.id, outid]
- for outid, aid in viewitems(mapping)]
- qdb.sql_connection.TRN.add(sql, sql_args, many=True)
-
- self._update_and_launch_children(mapping)
+ if mapping:
+ sql = """INSERT INTO
+ qiita.artifact_output_processing_job
+ (artifact_id, processing_job_id,
+ command_output_id)
+ VALUES (%s, %s, %s)"""
+ sql_args = [[aid, self.id, outid]
+ for outid, aid in viewitems(mapping)]
+ qdb.sql_connection.TRN.add(sql, sql_args, many=True)
+
+ self._update_and_launch_children(mapping)
self._set_status('success')
else:
self.step = "Validating outputs (%d remaining)" % remaining
@@ -444,6 +488,9 @@ def _complete_artifact_definition(self, artifact_data):
# The artifact is a result from a previous job
provenance = loads(job_params['provenance'])
job = ProcessingJob(provenance['job'])
+ if provenance.get('data_type') is not None:
+ artifact_data = {'data_type': provenance['data_type'],
+ 'artifact_data': artifact_data}
sql = """UPDATE qiita.processing_job_validator
SET artifact_info = %s
@@ -455,11 +502,27 @@ def _complete_artifact_definition(self, artifact_data):
self._set_status('waiting')
job.release_validators()
else:
- # The artifact is uploaded by the user
- pt = qdb.metadata_template.prep_template.PrepTemplate(
- job_params['template'])
+ # The artifact is uploaded by the user or is the initial
+ # artifact of an analysis
+ if ('analysis' in job_params and
+ job_params['analysis'] is not None):
+ pt = None
+ an = qdb.analysis.Analysis(job_params['analysis'])
+ sql = """SELECT data_type
+ FROM qiita.analysis_processing_job
+ WHERE analysis_id = %s
+ AND processing_job_id = %s"""
+ qdb.sql_connection.TRN.add(sql, [an.id, self.id])
+ data_type = qdb.sql_connection.TRN.execute_fetchlast()
+ else:
+ pt = qdb.metadata_template.prep_template.PrepTemplate(
+ job_params['template'])
+ an = None
+ data_type = None
+
qdb.artifact.Artifact.create(
- filepaths, atype, prep_template=pt)
+ filepaths, atype, prep_template=pt, analysis=an,
+ data_type=data_type)
self._set_status('success')
def _complete_artifact_transformation(self, artifacts_data):
@@ -505,11 +568,21 @@ def _complete_artifact_transformation(self, artifacts_data):
templates = set()
for artifact in self.input_artifacts:
templates.update(pt.id for pt in artifact.prep_templates)
+ template = None
+ analysis = None
if len(templates) > 1:
raise qdb.exceptions.QiitaDBError(
"Currently only single prep template "
"is allowed, found %d" % len(templates))
- template = templates.pop()
+ elif len(templates) == 1:
+ template = templates.pop()
+ else:
+ # In this case we have 0 templates. What this means is that
+ # this artifact is being generated in the analysis pipeline
+ # All the artifacts included in the analysis pipeline
+ # belong to the same analysis, so we can just ask the
+ # first artifact for the analysis that it belongs to
+ analysis = self.input_artifacts[0].analysis.id
# Once the validate job completes, it needs to know if it has
# been generated from a command (and how) or if it has been
@@ -530,6 +603,7 @@ def _complete_artifact_transformation(self, artifacts_data):
cmd, values_dict={'files': dumps(filepaths),
'artifact_type': atype,
'template': template,
+ 'analysis': analysis,
'provenance': dumps(provenance)})
validator_jobs.append(
ProcessingJob.create(self.user, validate_params))
@@ -1134,7 +1208,16 @@ def _raise_if_not_in_construction(self):
WHERE processing_job_workflow_id = %s"""
qdb.sql_connection.TRN.add(sql, [self.id])
res = qdb.sql_connection.TRN.execute_fetchflatten()
- if len(res) != 1 or res[0] != 'in_construction':
+ # If the above SQL query returns a single element and the value
+ # is different from in construction, it means that all the jobs
+ # in the workflow are in the same status and it is not
+ # 'in_construction', hence raise the error. If the above SQL query
+ # returns more than value (len(res) > 1) it means that the workflow
+ # is no longer in construction cause some jobs have been submited
+ # for processing. Note that if the above query doesn't retrun any
+ # value, it means that no jobs are in the workflow and that means
+ # that the workflow is in construction.
+ if (len(res) == 1 and res[0] != 'in_construction') or len(res) > 1:
# The workflow is no longer in construction, raise an error
raise qdb.exceptions.QiitaDBOperationNotPermittedError(
"Workflow not in construction")
diff --git a/qiita_db/software.py b/qiita_db/software.py
index 7335e6100..ce147afcb 100644
--- a/qiita_db/software.py
+++ b/qiita_db/software.py
@@ -44,7 +44,8 @@ class Command(qdb.base.QiitaObject):
_table = "software_command"
@classmethod
- def get_commands_by_input_type(cls, artifact_types, active_only=True):
+ def get_commands_by_input_type(cls, artifact_types, active_only=True,
+ exclude_analysis=True):
"""Returns the commands that can process the given artifact types
Parameters
@@ -54,6 +55,8 @@ def get_commands_by_input_type(cls, artifact_types, active_only=True):
active_only : bool, optional
If True, return only active commands, otherwise return all commands
Default: True
+ exclude_analysis : bool, optional
+ If True, return commands that are not part of the analysis pipeline
Returns
-------
@@ -70,6 +73,8 @@ def get_commands_by_input_type(cls, artifact_types, active_only=True):
WHERE artifact_type IN %s"""
if active_only:
sql += " AND active = True"
+ if exclude_analysis:
+ sql += " AND is_analysis = False"
qdb.sql_connection.TRN.add(sql, [tuple(artifact_types)])
for c_id in qdb.sql_connection.TRN.execute_fetchflatten():
yield cls(c_id)
@@ -191,7 +196,8 @@ def exists(cls, software, name):
return qdb.sql_connection.TRN.execute_fetchlast()
@classmethod
- def create(cls, software, name, description, parameters, outputs=None):
+ def create(cls, software, name, description, parameters, outputs=None,
+ analysis_only=False):
r"""Creates a new command in the system
The supported types for the parameters are:
@@ -222,6 +228,9 @@ def create(cls, software, name, description, parameters, outputs=None):
outputs : dict, optional
The description of the outputs that this command generated. The
format is: {output_name: artifact_type}
+ analysis_only : bool, optional
+ If true, then the command will only be available on the analysis
+ pipeline. Default: False.
Returns
-------
@@ -263,18 +272,27 @@ def create(cls, software, name, description, parameters, outputs=None):
ptype, dflt = vals
# Check that the type is one of the supported types
supported_types = ['string', 'integer', 'float', 'reference',
- 'boolean', 'prep_template']
+ 'boolean', 'prep_template', 'analysis']
if ptype not in supported_types and not ptype.startswith(
- ('choice', 'artifact')):
- supported_types.extend(['choice', 'artifact'])
+ ('choice', 'mchoice', 'artifact')):
+ supported_types.extend(['choice', 'mchoice', 'artifact'])
raise qdb.exceptions.QiitaDBError(
"Unsupported parameters type '%s' for parameter %s. "
"Supported types are: %s"
% (ptype, pname, ', '.join(supported_types)))
- if ptype.startswith('choice') and dflt is not None:
- choices = loads(ptype.split(':')[1])
- if dflt not in choices:
+ if ptype.startswith(('choice', 'mchoice')) and dflt is not None:
+ choices = set(loads(ptype.split(':')[1]))
+ dflt_val = dflt
+ if ptype.startswith('choice'):
+ # In the choice case, the dflt value is a single string,
+ # create a list with it the string on it to use the
+ # issuperset call below
+ dflt_val = [dflt_val]
+ else:
+ # jsonize the list to store it in the DB
+ dflt = dumps(dflt)
+ if not choices.issuperset(dflt_val):
raise qdb.exceptions.QiitaDBError(
"The default value '%s' for the parameter %s is not "
"listed in the available choices: %s"
@@ -297,10 +315,10 @@ def create(cls, software, name, description, parameters, outputs=None):
% (software.id, name))
# Add the command to the DB
sql = """INSERT INTO qiita.software_command
- (name, software_id, description)
- VALUES (%s, %s, %s)
+ (name, software_id, description, is_analysis)
+ VALUES (%s, %s, %s, %s)
RETURNING command_id"""
- sql_params = [name, software.id, description]
+ sql_params = [name, software.id, description, analysis_only]
qdb.sql_connection.TRN.add(sql, sql_params)
c_id = qdb.sql_connection.TRN.execute_fetchlast()
@@ -319,6 +337,7 @@ def create(cls, software, name, description, parameters, outputs=None):
sql_type = """INSERT INTO qiita.parameter_artifact_type
(command_parameter_id, artifact_type_id)
VALUES (%s, %s)"""
+ supported_types = []
for pname, p_type, atypes in sql_artifact_params:
sql_params = [c_id, pname, p_type, True, None]
qdb.sql_connection.TRN.add(sql, sql_params)
@@ -327,6 +346,30 @@ def create(cls, software, name, description, parameters, outputs=None):
[pid, qdb.util.convert_to_id(at, 'artifact_type')]
for at in atypes]
qdb.sql_connection.TRN.add(sql_type, sql_params, many=True)
+ supported_types.extend([atid for _, atid in sql_params])
+
+ # If the software type is 'artifact definition', there are a couple
+ # of extra steps
+ if software.type == 'artifact definition':
+ # If supported types is not empty, link the software with these
+ # types
+ if supported_types:
+ sql = """INSERT INTO qiita.software_artifact_type
+ (software_id, artifact_type_id)
+ VALUES (%s, %s)"""
+ sql_params = [[software.id, atid]
+ for atid in supported_types]
+ qdb.sql_connection.TRN.add(sql, sql_params, many=True)
+ # If this is the validate command, we need to add the
+ # provenance parameter. This is used internally, that's why
+ # we are adding it here
+ if name == 'Validate':
+ sql = """INSERT INTO qiita.command_parameter
+ (command_id, parameter_name, parameter_type,
+ required, default_value)
+ VALUES (%s, 'provenance', 'string', 'False', NULL)
+ """
+ qdb.sql_connection.TRN.add(sql, [c_id])
# Add the outputs to the command
if outputs:
@@ -445,7 +488,17 @@ def optional_parameters(self):
WHERE command_id = %s AND required = false"""
qdb.sql_connection.TRN.add(sql, [self.id])
res = qdb.sql_connection.TRN.execute_fetchindex()
- return {pname: [ptype, dflt] for pname, ptype, dflt in res}
+
+ # Define a function to load the json storing the default parameters
+ # if ptype is multiple choice. When I added it to the for loop as
+ # a one liner if, made the code a bit hard to read
+ def dflt_fmt(dflt, ptype):
+ if ptype.startswith('mchoice'):
+ return loads(dflt)
+ return dflt
+
+ return {pname: [ptype, dflt_fmt(dflt, ptype)]
+ for pname, ptype, dflt in res}
@property
def default_parameter_sets(self):
@@ -508,6 +561,22 @@ def activate(self):
qdb.sql_connection.TRN.add(sql, [True, self.id])
return qdb.sql_connection.TRN.execute()
+ @property
+ def analysis_only(self):
+ """Returns if the command is an analysis-only command
+
+ Returns
+ -------
+ bool
+ Whether the command is analysis only or not
+ """
+ with qdb.sql_connection.TRN:
+ sql = """SELECT is_analysis
+ FROM qiita.software_command
+ WHERE command_id = %s"""
+ qdb.sql_connection.TRN.add(sql, [self.id])
+ return qdb.sql_connection.TRN.execute_fetchlast()
+
class Software(qdb.base.QiitaObject):
r"""A software package available in the system
diff --git a/qiita_db/support_files/patches/54.sql b/qiita_db/support_files/patches/54.sql
new file mode 100644
index 000000000..c982566ad
--- /dev/null
+++ b/qiita_db/support_files/patches/54.sql
@@ -0,0 +1,123 @@
+-- Jan 5, 2017
+-- Move the analysis to the plugin system. This is a major rewrite of the
+-- database backend that supports the analysis pipeline.
+-- After exploring the data on the database, we realized that
+-- there are a lot of inconsistencies in the data. Unfortunately, this
+-- makes the process of transferring the data from the old structure
+-- to the new one a bit more challenging, as we will need to handle
+-- different special cases. Furthermore, all the information needed is not
+-- present in the database, since it requires checking BIOM files. Due to these
+-- reason, the vast majority of the data transfer is done in the python patch
+-- 51.py
+
+-- In this file we are just creating the new data structures. The old
+-- datastructure will be dropped in the python patch once all data has been
+-- transferred.
+
+-- Create the new data structures
+
+-- Table that links the analysis with the initial set of artifacts
+CREATE TABLE qiita.analysis_artifact (
+ analysis_id bigint NOT NULL,
+ artifact_id bigint NOT NULL,
+ CONSTRAINT idx_analysis_artifact_0 PRIMARY KEY (analysis_id, artifact_id)
+);
+CREATE INDEX idx_analysis_artifact_analysis ON qiita.analysis_artifact (analysis_id);
+CREATE INDEX idx_analysis_artifact_artifact ON qiita.analysis_artifact (artifact_id);
+ALTER TABLE qiita.analysis_artifact ADD CONSTRAINT fk_analysis_artifact_analysis FOREIGN KEY ( analysis_id ) REFERENCES qiita.analysis( analysis_id );
+ALTER TABLE qiita.analysis_artifact ADD CONSTRAINT fk_analysis_artifact_artifact FOREIGN KEY ( artifact_id ) REFERENCES qiita.artifact( artifact_id );
+
+-- Droping the analysis status column cause now it depends on the artifacts
+-- status, like the study does.
+ALTER TABLE qiita.analysis DROP COLUMN analysis_status_id;
+
+-- Create a table to link the analysis with the jobs that create the initial
+-- artifacts
+CREATE TABLE qiita.analysis_processing_job (
+ analysis_id bigint NOT NULL,
+ processing_job_id uuid NOT NULL,
+ CONSTRAINT idx_analysis_processing_job PRIMARY KEY ( analysis_id, processing_job_id )
+ ) ;
+
+CREATE INDEX idx_analysis_processing_job_analysis ON qiita.analysis_processing_job ( analysis_id ) ;
+CREATE INDEX idx_analysis_processing_job_pj ON qiita.analysis_processing_job ( processing_job_id ) ;
+ALTER TABLE qiita.analysis_processing_job ADD CONSTRAINT fk_analysis_processing_job FOREIGN KEY ( analysis_id ) REFERENCES qiita.analysis( analysis_id ) ;
+ALTER TABLE qiita.analysis_processing_job ADD CONSTRAINT fk_analysis_processing_job_pj FOREIGN KEY ( processing_job_id ) REFERENCES qiita.processing_job( processing_job_id ) ;
+
+-- Add a logging column in the analysis
+ALTER TABLE qiita.analysis ADD logging_id bigint ;
+CREATE INDEX idx_analysis_0 ON qiita.analysis ( logging_id ) ;
+ALTER TABLE qiita.analysis ADD CONSTRAINT fk_analysis_logging FOREIGN KEY ( logging_id ) REFERENCES qiita.logging( logging_id ) ;
+
+-- Alter the software command table to differentiate between commands that
+-- apply to the analysis pipeline or commands that apply on the study
+-- processing pipeline
+ALTER TABLE qiita.software_command ADD is_analysis bool DEFAULT 'False' NOT NULL;
+
+-- We can handle some of the special cases here, so we simplify the work in the
+-- python patch
+
+-- Special case 1: there are jobs in the database that do not contain
+-- any information about the options used to process those parameters.
+-- However, these jobs do not have any results and all are marked either
+-- as queued or error, although no error log has been saved. Since these
+-- jobs are mainly useleess, we are going to remove them from the system
+DELETE FROM qiita.analysis_job
+ WHERE job_id IN (SELECT job_id FROM qiita.job WHERE options = '{}');
+DELETE FROM qiita.job WHERE options = '{}';
+
+-- Special case 2: there are a fair amount of jobs (719 last time I
+-- checked) that are not attached to any analysis. Not sure how this
+-- can happen, but these orphan jobs can't be accessed from anywhere
+-- in the interface. Remove them from the system. Note that we are
+-- unlinking the files but we are not removing them from the filepath
+-- table. We will do that on the patch 47.py using the
+-- purge_filepaths function, as it will make sure that those files are
+-- not used anywhere else
+DELETE FROM qiita.job_results_filepath WHERE job_id IN (
+ SELECT job_id FROM qiita.job J WHERE NOT EXISTS (
+ SELECT * FROM qiita.analysis_job AJ WHERE J.job_id = AJ.job_id));
+DELETE FROM qiita.job J WHERE NOT EXISTS (
+ SELECT * FROM qiita.analysis_job AJ WHERE J.job_id = AJ.job_id);
+
+-- In the analysis pipeline, an artifact can have mutliple datatypes
+-- (e.g. procrustes). Allow this by creating a new data_type being "multiomic"
+INSERT INTO qiita.data_type (data_type) VALUES ('Multiomic');
+
+
+-- The valdiate command from BIOM will have an extra parameter, analysis
+-- Magic number -> 4 BIOM command_id -> known for sure since it was added in
+-- patch 36.sql
+INSERT INTO qiita.command_parameter (command_id, parameter_name, parameter_type, required)
+ VALUES (4, 'analysis', 'analysis', FALSE);
+-- The template comand now becomes optional, since it can be added either to
+-- an analysis or to a prep template. command_parameter_id known from patch
+-- 36.sql
+UPDATE qiita.command_parameter SET required = FALSE WHERE command_parameter_id = 34;
+
+-- We are going to add a new special software type, and a new software.
+-- This is going to be used internally by Qiita, so submit the private jobs.
+-- This is needed for the analysis.
+INSERT INTO qiita.software_type (software_type, description)
+ VALUES ('private', 'Internal Qiita jobs');
+
+DO $do$
+DECLARE
+ qiita_sw_id bigint;
+ baf_cmd_id bigint;
+BEGIN
+ INSERT INTO qiita.software (name, version, description, environment_script, start_script, software_type_id, active)
+ VALUES ('Qiita', 'alpha', 'Internal Qiita jobs', 'source activate qiita', 'qiita-private-plugin', 3, True)
+ RETURNING software_id INTO qiita_sw_id;
+
+ INSERT INTO qiita.software_command (software_id, name, description)
+ VALUES (qiita_sw_id, 'build_analysis_files', 'Builds the files needed for the analysis')
+ RETURNING command_id INTO baf_cmd_id;
+
+ INSERT INTO qiita.command_parameter (command_id, parameter_name, parameter_type, required, default_value)
+ VALUES (baf_cmd_id, 'analysis', 'analysis', True, NULL),
+ (baf_cmd_id, 'merge_dup_sample_ids', 'bool', False, 'False');
+END $do$;
+
+-- Add a new filepath type
+INSERT INTO qiita.filepath_type (filepath_type) VALUES ('html_summary_dir'), ('qzv');
diff --git a/qiita_db/support_files/patches/55.sql b/qiita_db/support_files/patches/55.sql
new file mode 100644
index 000000000..65bafd459
--- /dev/null
+++ b/qiita_db/support_files/patches/55.sql
@@ -0,0 +1,4 @@
+-- Jul 6, 2017
+-- DELETE all sample/prep CONSTRAINTs
+
+SELECT 42;
diff --git a/qiita_db/support_files/patches/python_patches/54.py b/qiita_db/support_files/patches/python_patches/54.py
new file mode 100644
index 000000000..1dc576004
--- /dev/null
+++ b/qiita_db/support_files/patches/python_patches/54.py
@@ -0,0 +1,691 @@
+# The code is commented with details on the changes implemented here,
+# but here is an overview of the changes needed to transfer the analysis
+# data to the plugins structure:
+# 1) Create a new type plugin to define the diversity types
+# 2) Create the new commands on the existing QIIME plugin to execute the
+# existing analyses (beta div, taxa summaries and alpha rarefaction)
+# 3) Transfer all the data in the old structures to the plugin structures
+# 4) Delete old structures
+
+from string import ascii_letters, digits
+from random import SystemRandom
+from os.path import join, exists, basename
+from os import makedirs
+from json import loads
+
+from biom import load_table, Table
+from biom.util import biom_open
+
+from qiita_db.sql_connection import TRN
+from qiita_db.util import (get_db_files_base_dir, purge_filepaths,
+ get_mountpoint, compute_checksum)
+from qiita_db.artifact import Artifact
+
+# Create some aux functions that are going to make the code more modular
+# and easier to understand, since there is a fair amount of work to do to
+# trasnfer the data from the old structure to the new one
+
+
+def get_random_string(length):
+ """Creates a random string of the given length with alphanumeric chars
+
+ Parameters
+ ----------
+ length : int
+ The desired length of the string
+
+ Returns
+ -------
+ str
+ The new random string
+ """
+ sr = SystemRandom()
+ chars = ascii_letters + digits
+ return ''.join(sr.choice(chars) for i in range(length))
+
+
+def create_non_rarefied_biom_artifact(analysis, biom_data, rarefied_table):
+ """Creates the initial non-rarefied BIOM artifact of the analysis
+
+ Parameters
+ ----------
+ analysis : dict
+ Dictionary with the analysis information
+ biom_data : dict
+ Dictionary with the biom file information
+ rarefied_table : biom.Table
+ The rarefied BIOM table
+
+ Returns
+ -------
+ int
+ The id of the new artifact
+ """
+ # The non rarefied biom artifact is the initial biom table of the analysis.
+ # This table does not currently exist anywhere, so we need to actually
+ # create the BIOM file. To create this BIOM file we need: (1) the samples
+ # and artifacts they come from and (2) whether the samples where
+ # renamed or not. (1) is on the database, but we need to inferr (2) from
+ # the existing rarefied BIOM table. Fun, fun...
+
+ with TRN:
+ # Get the samples included in the BIOM table grouped by artifact id
+ # Note that the analysis contains a BIOM table per data type included
+ # in it, and the table analysis_sample does not differentiate between
+ # datatypes, so we need to check the data type in the artifact table
+ sql = """SELECT artifact_id, array_agg(sample_id)
+ FROM qiita.analysis_sample
+ JOIN qiita.artifact USING (artifact_id)
+ WHERE analysis_id = %s AND data_type_id = %s
+ GROUP BY artifact_id"""
+ TRN.add(sql, [analysis['analysis_id'], biom_data['data_type_id']])
+ samples_by_artifact = TRN.execute_fetchindex()
+
+ # Create an empty BIOM table to be the new master table
+ new_table = Table([], [], [])
+ ids_map = {}
+ for a_id, samples in samples_by_artifact:
+ # Get the filepath of the BIOM table from the artifact
+ artifact = Artifact(a_id)
+ biom_fp = None
+ for _, fp, fp_type in artifact.filepaths:
+ if fp_type == 'biom':
+ biom_fp = fp
+ # Note that we are sure that the biom table exists for sure, so
+ # no need to check if biom_fp is undefined
+ biom_table = load_table(biom_fp)
+ samples = set(samples).intersection(biom_table.ids())
+ biom_table.filter(samples, axis='sample', inplace=True)
+ # we need to check if the table has samples left before merging
+ if biom_table.shape[0] != 0 and biom_table.shape[1] != 0:
+ new_table = new_table.merge(biom_table)
+ ids_map.update({sid: "%d.%s" % (a_id, sid)
+ for sid in biom_table.ids()})
+
+ # Check if we need to rename the sample ids in the biom table
+ new_table_ids = set(new_table.ids())
+ if not new_table_ids.issuperset(rarefied_table.ids()):
+ # We need to rename the sample ids
+ new_table.update_ids(ids_map, 'sample', True, True)
+
+ sql = """INSERT INTO qiita.artifact
+ (generated_timestamp, data_type_id, visibility_id,
+ artifact_type_id, submitted_to_vamps)
+ VALUES (%s, %s, %s, %s, %s)
+ RETURNING artifact_id"""
+ # Magic number 4 -> visibility sandbox
+ # Magix number 7 -> biom artifact type
+ TRN.add(sql, [analysis['timestamp'], biom_data['data_type_id'],
+ 4, 7, False])
+ artifact_id = TRN.execute_fetchlast()
+
+ # Associate the artifact with the analysis
+ sql = """INSERT INTO qiita.analysis_artifact
+ (analysis_id, artifact_id)
+ VALUES (%s, %s)"""
+ TRN.add(sql, [analysis['analysis_id'], artifact_id])
+ # Link the artifact with its file
+ dd_id, mp = get_mountpoint('BIOM')[0]
+ dir_fp = join(get_db_files_base_dir(), mp, str(artifact_id))
+ if not exists(dir_fp):
+ makedirs(dir_fp)
+ new_table_fp = join(dir_fp, "biom_table.biom")
+ with biom_open(new_table_fp, 'w') as f:
+ new_table.to_hdf5(f, "Generated by Qiita")
+
+ sql = """INSERT INTO qiita.filepath
+ (filepath, filepath_type_id, checksum,
+ checksum_algorithm_id, data_directory_id)
+ VALUES (%s, %s, %s, %s, %s)
+ RETURNING filepath_id"""
+ # Magic number 7 -> filepath_type_id = 'biom'
+ # Magic number 1 -> the checksum algorithm id
+ TRN.add(sql, [basename(new_table_fp), 7,
+ compute_checksum(new_table_fp), 1, dd_id])
+ fp_id = TRN.execute_fetchlast()
+ sql = """INSERT INTO qiita.artifact_filepath
+ (artifact_id, filepath_id)
+ VALUES (%s, %s)"""
+ TRN.add(sql, [artifact_id, fp_id])
+ TRN.execute()
+
+ return artifact_id
+
+
+def create_rarefaction_job(depth, biom_artifact_id, analysis, srare_cmd_id):
+ """Create a new rarefaction job
+
+ Parameters
+ ----------
+ depth : int
+ The rarefaction depth
+ biom_artifact_id : int
+ The artifact id of the input rarefaction biom table
+ analysis : dict
+ Dictionary with the analysis information
+ srare_cmd_id : int
+ The command id of the single rarefaction command
+
+ Returns
+ -------
+ job_id : str
+ The job id
+ params : str
+ The job parameters
+ """
+ # Add the row in the procesisng job table
+ params = ('{"depth":%d,"subsample_multinomial":false,"biom_table":%s}'
+ % (depth, biom_artifact_id))
+ with TRN:
+ # magic number 3: status -> success
+ sql = """INSERT INTO qiita.processing_job
+ (email, command_id, command_parameters,
+ processing_job_status_id)
+ VALUES (%s, %s, %s, %s)
+ RETURNING processing_job_id"""
+ TRN.add(sql, [analysis['email'], srare_cmd_id, params, 3])
+ job_id = TRN.execute_fetchlast()
+ # Step 1.2.b: Link the job with the input artifact
+ sql = """INSERT INTO qiita.artifact_processing_job
+ (artifact_id, processing_job_id)
+ VALUES (%s, %s)"""
+ TRN.add(sql, [biom_artifact_id, job_id])
+ TRN.execute()
+ return job_id, params
+
+
+def transfer_file_to_artifact(analysis_id, a_timestamp, command_id,
+ data_type_id, params, artifact_type_id,
+ filepath_id):
+ """Creates a new artifact with the given filepath id
+
+ Parameters
+ ----------
+ analysis_id : int
+ The analysis id to attach the artifact
+ a_timestamp : datetime.datetime
+ The generated timestamp of the artifact
+ command_id : int
+ The command id of the artifact
+ data_type_id : int
+ The data type id of the artifact
+ params : str
+ The parameters of the artifact
+ artifact_type_id : int
+ The artifact type
+ filepath_id : int
+ The filepath id
+
+ Returns
+ -------
+ int
+ The artifact id
+ """
+ with TRN:
+ # Add the row in the artifact table
+ # Magic number 4: Visibility -> sandbox
+ sql = """INSERT INTO qiita.artifact
+ (generated_timestamp, command_id, data_type_id,
+ command_parameters, visibility_id, artifact_type_id,
+ submitted_to_vamps)
+ VALUES (%s, %s, %s, %s, %s, %s, %s)
+ RETURNING artifact_id"""
+ TRN.add(sql, [a_timestamp, command_id, data_type_id, params, 4,
+ artifact_type_id, False])
+ artifact_id = TRN.execute_fetchlast()
+ # Link the artifact with its file
+ sql = """INSERT INTO qiita.artifact_filepath (artifact_id, filepath_id)
+ VALUES (%s, %s)"""
+ TRN.add(sql, [artifact_id, filepath_id])
+ # Link the artifact with the analysis
+ sql = """INSERT INTO qiita.analysis_artifact
+ (analysis_id, artifact_id)
+ VALUES (%s, %s)"""
+ TRN.add(sql, [analysis_id, artifact_id])
+
+ return artifact_id
+
+
+def create_rarefied_biom_artifact(analysis, srare_cmd_id, biom_data, params,
+ parent_biom_artifact_id, rarefaction_job_id,
+ srare_cmd_out_id):
+ """Creates the rarefied biom artifact
+
+ Parameters
+ ----------
+ analysis : dict
+ The analysis information
+ srare_cmd_id : int
+ The command id of "Single Rarefaction"
+ biom_data : dict
+ The biom information
+ params : str
+ The processing parameters
+ parent_biom_artifact_id : int
+ The parent biom artifact id
+ rarefaction_job_id : str
+ The job id of the rarefaction job
+ srare_cmd_out_id : int
+ The id of the single rarefaction output
+
+ Returns
+ -------
+ int
+ The artifact id
+ """
+ with TRN:
+ # Transfer the file to an artifact
+ # Magic number 7: artifact type -> biom
+ artifact_id = transfer_file_to_artifact(
+ analysis['analysis_id'], analysis['timestamp'], srare_cmd_id,
+ biom_data['data_type_id'], params, 7, biom_data['filepath_id'])
+ # Link the artifact with its parent
+ sql = """INSERT INTO qiita.parent_artifact (artifact_id, parent_id)
+ VALUES (%s, %s)"""
+ TRN.add(sql, [artifact_id, parent_biom_artifact_id])
+ # Link the artifact as the job output
+ sql = """INSERT INTO qiita.artifact_output_processing_job
+ (artifact_id, processing_job_id, command_output_id)
+ VALUES (%s, %s, %s)"""
+ TRN.add(sql, [artifact_id, rarefaction_job_id, srare_cmd_out_id])
+ return artifact_id
+
+
+def transfer_job(analysis, command_id, params, input_artifact_id, job_data,
+ cmd_out_id, biom_data, output_artifact_type_id):
+ """Transfers the job from the old structure to the plugin structure
+
+ Parameters
+ ----------
+ analysis : dict
+ The analysis information
+ command_id : int
+ The id of the command executed
+ params : str
+ The parameters used in the job
+ input_artifact_id : int
+ The id of the input artifact
+ job_data : dict
+ The job information
+ cmd_out_id : int
+ The id of the command's output
+ biom_data : dict
+ The biom information
+ output_artifact_type_id : int
+ The type of the output artifact
+ """
+ with TRN:
+ # Create the job
+ # Add the row in the processing job table
+ # Magic number 3: status -> success
+ sql = """INSERT INTO qiita.processing_job
+ (email, command_id, command_parameters,
+ processing_job_status_id)
+ VALUES (%s, %s, %s, %s)
+ RETURNING processing_job_id"""
+ TRN.add(sql, [analysis['email'], command_id, params, 3])
+ job_id = TRN.execute_fetchlast()
+
+ # Link the job with the input artifact
+ sql = """INSERT INTO qiita.artifact_processing_job
+ (artifact_id, processing_job_id)
+ VALUES (rarefied_biom_id, proc_job_id)"""
+ TRN.add(sql, [input_artifact_id, job_id])
+
+ # Check if the executed job has results and add them
+ sql = """SELECT EXISTS(SELECT *
+ FROM qiita.job_results_filepath
+ WHERE job_id = %s)"""
+ TRN.add(sql, [job_data['job_id']])
+ if TRN.execute_fetchlast():
+ # There are results for the current job.
+ # Transfer the job files to a new artifact
+ sql = """SELECT filepath_id
+ FROM qiita.job_results_filepath
+ WHERE job_id = %s"""
+ TRN.add(sql, job_data['job_id'])
+ filepath_id = TRN.execute_fetchlast()
+ artifact_id = transfer_file_to_artifact(
+ analysis['analysis_id'], analysis['timestamp'], command_id,
+ biom_data['data_type_id'], params, output_artifact_type_id,
+ filepath_id)
+
+ # Link the artifact with its parent
+ sql = """INSERT INTO qiita.parent_artifact (artifact_id, parent_id)
+ VALUES (%s, %s)"""
+ TRN.add(sql, [artifact_id, input_artifact_id])
+ # Link the artifact as the job output
+ sql = """INSERT INTO qiita.artifact_output_processing_job
+ (artifact_id, processing_job_id, command_output_id)
+ VALUES (%s, %s, %s)"""
+ TRN.add(sql, [artifact_id, job_id, cmd_out_id])
+ TRN.exeucte()
+ else:
+ # There are no results on the current job, so mark it as
+ # error
+ if job_data.log_id is None:
+ # Magic number 2 - we are not using any other severity
+ # level, so keep using number 2
+ sql = """INSERT INTO qiita.logging (time, severity_id, msg)
+ VALUES (%s, %s, %s)
+ RETURNING logging_id"""
+ TRN.add(sql, [analysis['timestamp'], 2,
+ "Unknown error - patch 47"])
+ else:
+ log_id = job_data['log_id']
+
+ # Magic number 4 -> status -> error
+ sql = """UPDATE qiita.processing_job
+ SET processing_job_status_id = 4, logging_id = %s
+ WHERE processing_job_id = %s"""
+ TRN.add(sql, [log_id, job_id])
+
+
+# The new commands that we are going to add generate new artifact types.
+# These new artifact types are going to be added to a different plugin.
+# In interest of time and given that the artifact type system is going to
+# change in the near future, we feel that the easiest way to transfer
+# the current analyses results is by creating 3 different types of
+# artifacts: (1) distance matrix -> which will include the distance matrix,
+# the principal coordinates and the emperor plots; (2) rarefaction
+# curves -> which will include all the files generated by alpha rarefaction
+# and (3) taxonomy summary, which will include all the files generated
+# by summarize_taxa_through_plots.py
+
+with TRN:
+ # Add the new artifact types
+ sql = """INSERT INTO qiita.artifact_type (
+ artifact_type, description, can_be_submitted_to_ebi,
+ can_be_submitted_to_vamps)
+ VALUES (%s, %s, %s, %s)
+ RETURNING artifact_type_id"""
+ TRN.add(sql, ['beta_div_plots', 'Qiime 1 beta diversity results',
+ False, False])
+ dm_atype_id = TRN.execute_fetchlast()
+ TRN.add(sql, ['rarefaction_curves', 'Rarefaction curves', False, False])
+ rc_atype_id = TRN.execute_fetchlast()
+ TRN.add(sql, ['taxa_summary', 'Taxa summary plots', False, False])
+ ts_atype_id = TRN.execute_fetchlast()
+
+ # Associate each artifact with the filetypes that it accepts
+ # At this time we are going to add them as directories, just as it is done
+ # right now. We can make it fancier with the new type system.
+ # Magic number 8: the filepath_type_id for the directory
+ sql = """INSERT INTO qiita.artifact_type_filepath_type
+ (artifact_type_id, filepath_type_id, required)
+ VALUES (%s, %s, %s)"""
+ sql_args = [[dm_atype_id, 8, True],
+ [rc_atype_id, 8, True],
+ [ts_atype_id, 8, True]]
+ TRN.add(sql, sql_args, many=True)
+
+ # Create the new commands that execute the current analyses. In qiita,
+ # the only commands that where available are Summarize Taxa, Beta
+ # Diversity and Alpha Rarefaction. The system was executing rarefaction
+ # by default, but it should be a different step in the analysis process
+ # so we are going to create a command for it too. These commands are going
+ # to be part of the QIIME plugin, so we are going to first retrieve the
+ # id of the QIIME 1.9.1 plugin, which for sure exists cause it was added
+ # in patch 33 and there is no way of removing plugins
+
+ # Step 1: Get the QIIME plugin id
+ sql = """SELECT software_id
+ FROM qiita.software
+ WHERE name = 'QIIME' AND version = '1.9.1'"""
+ TRN.add(sql)
+ qiime_id = TRN.execute_fetchlast()
+
+ # Step 2: Insert the new commands in the software_command table
+ sql = """INSERT INTO qiita.software_command
+ (software_id, name, description, is_analysis)
+ VALUES (%s, %s, %s, TRUE)
+ RETURNING command_id"""
+ TRN.add(sql, [qiime_id, 'Summarize Taxa', 'Plots taxonomy summaries at '
+ 'different taxonomy levels'])
+ sum_taxa_cmd_id = TRN.execute_fetchlast()
+ TRN.add(sql, [qiime_id, 'Beta Diversity',
+ 'Computes and plots beta diversity results'])
+ bdiv_cmd_id = TRN.execute_fetchlast()
+ TRN.add(sql, [qiime_id, 'Alpha Rarefaction',
+ 'Computes and plots alpha rarefaction results'])
+ arare_cmd_id = TRN.execute_fetchlast()
+ TRN.add(sql, [qiime_id, 'Single Rarefaction',
+ 'Rarefies the input table by random sampling without '
+ 'replacement'])
+ srare_cmd_id = TRN.execute_fetchlast()
+
+ # Step 3: Insert the parameters for each command
+ sql = """INSERT INTO qiita.command_parameter
+ (command_id, parameter_name, parameter_type, required,
+ default_value)
+ VALUES (%s, %s, %s, %s, %s)
+ RETURNING command_parameter_id"""
+ sql_args = [
+ # Summarize Taxa
+ (sum_taxa_cmd_id, 'metadata_category', 'string', False, ''),
+ (sum_taxa_cmd_id, 'sort', 'bool', False, 'False'),
+ # Beta Diversity
+ (bdiv_cmd_id, 'tree', 'string', False, ''),
+ (bdiv_cmd_id, 'metric',
+ 'choice:["abund_jaccard","binary_chisq","binary_chord",'
+ '"binary_euclidean","binary_hamming","binary_jaccard",'
+ '"binary_lennon","binary_ochiai","binary_otu_gain","binary_pearson",'
+ '"binary_sorensen_dice","bray_curtis","bray_curtis_faith",'
+ '"bray_curtis_magurran","canberra","chisq","chord","euclidean",'
+ '"gower","hellinger","kulczynski","manhattan","morisita_horn",'
+ '"pearson","soergel","spearman_approx","specprof","unifrac",'
+ '"unifrac_g","unifrac_g_full_tree","unweighted_unifrac",'
+ '"unweighted_unifrac_full_tree","weighted_normalized_unifrac",'
+ '"weighted_unifrac"]', False, '"binary_jaccard"'),
+ # Alpha rarefaction
+ (arare_cmd_id, 'tree', 'string', False, ''),
+ (arare_cmd_id, 'num_steps', 'integer', False, 10),
+ (arare_cmd_id, 'min_rare_depth', 'integer', False, 10),
+ (arare_cmd_id, 'max_rare_depth', 'integer', False, 'Default'),
+ (arare_cmd_id, 'metrics',
+ 'mchoice:["ace","berger_parker_d","brillouin_d","chao1","chao1_ci",'
+ '"dominance","doubles","enspie","equitability","esty_ci",'
+ '"fisher_alpha","gini_index","goods_coverage","heip_e",'
+ '"kempton_taylor_q","margalef","mcintosh_d","mcintosh_e",'
+ '"menhinick","michaelis_menten_fit","observed_otus",'
+ '"observed_species","osd","simpson_reciprocal","robbins",'
+ '"shannon","simpson","simpson_e","singles","strong","PD_whole_tree"]',
+ False, '["chao1","observed_otus"]'),
+ # Single rarefaction
+ (srare_cmd_id, 'depth', 'integer', True, None),
+ (srare_cmd_id, 'subsample_multinomial', 'bool', False, 'False')
+ ]
+ TRN.add(sql, sql_args, many=True)
+
+ TRN.add(sql, [sum_taxa_cmd_id, 'biom_table', 'artifact', True, None])
+ sum_taxa_cmd_param_id = TRN.execute_fetchlast()
+ TRN.add(sql, [bdiv_cmd_id, 'biom_table', 'artifact', True, None])
+ bdiv_cmd_param_id = TRN.execute_fetchlast()
+ TRN.add(sql, [arare_cmd_id, 'biom_table', 'artifact', True, None])
+ arare_cmd_param_id = TRN.execute_fetchlast()
+ TRN.add(sql, [srare_cmd_id, 'biom_table', 'artifact', True, None])
+ srare_cmd_param_id = TRN.execute_fetchlast()
+
+ # Step 4: Connect the artifact parameters with the artifact types that
+ # they accept
+ sql = """SELECT artifact_type_id
+ FROM qiita.artifact_type
+ WHERE artifact_type = 'BIOM'"""
+ TRN.add(sql)
+ biom_atype_id = TRN.execute_fetchlast()
+
+ sql = """INSERT INTO qiita.parameter_artifact_type
+ (command_parameter_id, artifact_type_id)
+ VALUES (%s, %s)"""
+ sql_args = [[sum_taxa_cmd_param_id, biom_atype_id],
+ [bdiv_cmd_param_id, biom_atype_id],
+ [arare_cmd_param_id, biom_atype_id],
+ [srare_cmd_param_id, biom_atype_id]]
+ TRN.add(sql, sql_args, many=True)
+
+ # Step 5: Add the outputs of the command.
+ sql = """INSERT INTO qiita.command_output
+ (name, command_id, artifact_type_id)
+ VALUES (%s, %s, %s)
+ RETURNING command_output_id"""
+ TRN.add(sql, ['taxa_summary', sum_taxa_cmd_id, ts_atype_id])
+ sum_taxa_cmd_out_id = TRN.execute_fetchlast()
+ TRN.add(sql, ['distance_matrix', bdiv_cmd_id, dm_atype_id])
+ bdiv_cmd_out_id = TRN.execute_fetchlast()
+ TRN.add(sql, ['rarefaction_curves', arare_cmd_id, rc_atype_id])
+ arare_cmd_out_id = TRN.execute_fetchlast()
+ TRN.add(sql, ['rarefied_table', srare_cmd_id, biom_atype_id])
+ srare_cmd_out_id = TRN.execute_fetchlast()
+
+ # Step 6: Add default parameter sets
+ sql = """INSERT INTO qiita.default_parameter_set
+ (command_id, parameter_set_name, parameter_set)
+ VALUES (%s, %s, %s)"""
+ sql_args = [
+ [sum_taxa_cmd_id, 'Defaults',
+ '{"sort": false, "metadata_category": ""}'],
+ [bdiv_cmd_id, 'Unweighted UniFrac',
+ '{"metric": "unweighted_unifrac", "tree": ""}'],
+ [arare_cmd_id, 'Defaults',
+ '{"max_rare_depth": "Default", "tree": "", "num_steps": 10, '
+ '"min_rare_depth": 10, "metrics": ["chao1", "observed_otus"]}'],
+ [srare_cmd_id, 'Defaults',
+ '{"subsample_multinomial": "False"}']]
+ TRN.add(sql, sql_args, many=True)
+
+# At this point we are ready to start transferring the data from the old
+# structures to the new structures. Overview of the procedure:
+# Step 1: Add initial set of artifacts up to rarefied table
+# Step 2: Transfer the "analisys jobs" to processing jobs and create
+# the analysis artifacts
+db_dir = get_db_files_base_dir()
+with TRN:
+ sql = "SELECT * FROM qiita.analysis"
+ TRN.add(sql)
+ analysis_info = TRN.execute_fetchindex()
+
+ # Loop through all the analysis
+ for analysis in analysis_info:
+ # Step 1: Add the inital set of artifacts. An analysis starts with
+ # a set of BIOM artifacts.
+ sql = """SELECT *
+ FROM qiita.analysis_filepath
+ JOIN qiita.filepath USING (filepath_id)
+ JOIN qiita.filepath_type USING (filepath_type_id)
+ WHERE analysis_id = %s AND filepath_type = 'biom'"""
+ TRN.add(sql, [analysis['analysis_id']])
+ analysis_bioms = TRN.execute_fetchindex()
+
+ # Loop through all the biom tables associated with the current analysis
+ # so we can create the initial set of artifacts
+ for biom_data in analysis_bioms:
+ # Get the path of the BIOM table
+ sql = """SELECT filepath, mountpoint
+ FROM qiita.filepath
+ JOIN qiita.data_directory USING (data_directory_id)
+ WHERE filepath_id = %s"""
+ TRN.add(sql, [biom_data['filepath_id']])
+ # Magic number 0: There is only a single row in the query result
+ fp_info = TRN.execute_fetchindex()[0]
+ filepath = join(db_dir, fp_info['mountpoint'], fp_info['filepath'])
+
+ # We need to check if the BIOM table has been rarefied or not
+ table = load_table(filepath)
+ depths = set(table.sum(axis='sample'))
+ if len(depths) == 1:
+ # The BIOM table was rarefied
+ # Create the initial unrarefied artifact
+ initial_biom_artifact_id = create_non_rarefied_biom_artifact(
+ analysis, biom_data, table)
+ # Create the rarefaction job
+ rarefaction_job_id, params = create_rarefaction_job(
+ depths.pop(), initial_biom_artifact_id, analysis,
+ srare_cmd_id)
+ # Create the rarefied artifact
+ rarefied_biom_artifact_id = create_rarefied_biom_artifact(
+ analysis, srare_cmd_id, biom_data, params,
+ initial_biom_artifact_id, rarefaction_job_id,
+ srare_cmd_out_id)
+ else:
+ # The BIOM table was not rarefied, use current table as initial
+ initial_biom_id = transfer_file_to_artifact(
+ analysis['analysis_id'], analysis['timestamp'], None,
+ biom_data['data_type_id'], None, 7,
+ biom_data['filepath_id'])
+
+ # Loop through all the jobs that used this biom table as input
+ sql = """SELECT *
+ FROM qiita.job
+ WHERE reverse(split_part(reverse(
+ options::json->>'--otu_table_fp'), '/', 1)) = %s"""
+ TRN.add(sql, [filepath])
+ analysis_jobs = TRN.execute_fetchindex()
+ for job_data in analysis_jobs:
+ # Identify which command the current job exeucted
+ if job_data['command_id'] == 1:
+ # Taxa summaries
+ cmd_id = sum_taxa_cmd_id
+ params = ('{"biom_table":%d,"metadata_category":"",'
+ '"sort":false}' % initial_biom_id)
+ output_artifact_type_id = ts_atype_id
+ cmd_out_id = sum_taxa_cmd_out_id
+ elif job_data['command_id'] == 2:
+ # Beta diversity
+ cmd_id = bdiv_cmd_id
+ tree_fp = loads(job_data['options'])['--tree_fp']
+ if tree_fp:
+ params = ('{"biom_table":%d,"tree":"%s","metrics":'
+ '["unweighted_unifrac","weighted_unifrac"]}'
+ % (initial_biom_id, tree_fp))
+ else:
+ params = ('{"biom_table":%d,"metrics":["bray_curtis",'
+ '"gower","canberra","pearson"]}'
+ % initial_biom_id)
+ output_artifact_type_id = dm_atype_id
+ cmd_out_id = bdiv_cmd_out_id
+ else:
+ # Alpha rarefaction
+ cmd_id = arare_cmd_id
+ tree_fp = loads(job_data['options'])['--tree_fp']
+ params = ('{"biom_table":%d,"tree":"%s","num_steps":"10",'
+ '"min_rare_depth":"10",'
+ '"max_rare_depth":"Default"}'
+ % (initial_biom_id, tree_fp))
+ output_artifact_type_id = rc_atype_id
+ cmd_out_id = arare_cmd_out_id
+
+ transfer_job(analysis, cmd_id, params, initial_biom_id,
+ job_data, cmd_out_id, biom_data,
+ output_artifact_type_id)
+
+errors = []
+with TRN:
+ # Unlink the analysis from the biom table filepaths
+ # Magic number 7 -> biom filepath type
+ sql = """DELETE FROM qiita.analysis_filepath
+ WHERE filepath_id IN (SELECT filepath_id
+ FROM qiita.filepath
+ WHERE filepath_type_id = 7)"""
+ TRN.add(sql)
+ TRN.execute()
+
+ # Delete old structures that are not used anymore
+ tables = ["collection_job", "collection_analysis", "collection_users",
+ "collection", "collection_status", "analysis_workflow",
+ "analysis_chain", "analysis_job", "job_results_filepath", "job",
+ "job_status", "command_data_type", "command", "analysis_status"]
+ for table in tables:
+ TRN.add("DROP TABLE qiita.%s" % table)
+ try:
+ TRN.execute()
+ except Exception as e:
+ errors.append("Error deleting table %s: %s" % (table, str(e)))
+
+# Purge filepaths
+try:
+ purge_filepaths()
+except Exception as e:
+ errors.append("Error purging filepaths: %s" % str(e))
+
+if errors:
+ print "\n".join(errors)
diff --git a/qiita_db/support_files/patches/python_patches/55.py b/qiita_db/support_files/patches/python_patches/55.py
new file mode 100644
index 000000000..4dd25bb79
--- /dev/null
+++ b/qiita_db/support_files/patches/python_patches/55.py
@@ -0,0 +1,21 @@
+from qiita_db.sql_connection import TRN
+
+sql = """
+ SELECT constraint_name AS cname, 'qiita.' || table_name AS tname
+ FROM information_schema.table_constraints
+ WHERE constraint_type ='FOREIGN KEY' AND (
+ (constraint_name LIKE 'fk_sample_%' AND table_name LIKE 'sample_%') OR
+ (constraint_name LIKE 'fk_prep_%' AND table_name LIKE 'prep_%')) AND
+ table_name NOT IN (
+ 'prep_template', 'prep_template_sample', 'prep_template_filepath',
+ 'prep_template_processing_job')"""
+
+with TRN:
+ TRN.add(sql)
+ to_delete = TRN.execute_fetchindex()
+
+for cname, tname in to_delete:
+ with TRN:
+ sql = "ALTER TABLE %s DROP CONSTRAINT %s" % (tname, cname)
+ TRN.add(sql)
+ TRN.execute()
diff --git a/qiita_db/support_files/qiita-db.dbs b/qiita_db/support_files/qiita-db.dbs
index 1f7ebe2f5..84ac07efc 100644
--- a/qiita_db/support_files/qiita-db.dbs
+++ b/qiita_db/support_files/qiita-db.dbs
@@ -14,7 +14,6 @@