diff --git a/.travis.yml b/.travis.yml
index 8e4c86827..5a191929f 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -40,9 +40,10 @@ script:
- qiita-env start_cluster qiita-general
- qiita-env make --no-load-ontologies
- if [ ${TEST_ADD_STUDIES} == "True" ]; then test_data_studies/commands.sh ; fi
+ - if [ ${TEST_ADD_STUDIES} == "True" ]; then qiita-cron-job ; fi
- if [ ${TEST_ADD_STUDIES} == "False" ]; then qiita-test-install ; fi
- if [ ${TEST_ADD_STUDIES} == "False" ]; then nosetests --with-doctest --with-coverage -v --cover-package=qiita_db,qiita_pet,qiita_core,qiita_ware; fi
- - flake8 qiita_* setup.py scripts/qiita scripts/qiita-env scripts/qiita-test-install
+ - flake8 qiita_* setup.py scripts/*
- ls -R /home/travis/miniconda3/envs/qiita/lib/python2.7/site-packages/qiita_pet/support_files/doc/
- qiita pet webserver
addons:
diff --git a/qiita_core/support_files/server.crt b/qiita_core/support_files/server.crt
index 0a56752a6..361fb497e 100644
--- a/qiita_core/support_files/server.crt
+++ b/qiita_core/support_files/server.crt
@@ -1,15 +1,15 @@
-----BEGIN CERTIFICATE-----
-MIICQzCCAawCCQDD7K/frIbu8DANBgkqhkiG9w0BAQUFADBmMQswCQYDVQQGEwJV
+MIICRTCCAa4CCQDPGmrQ4bra7TANBgkqhkiG9w0BAQUFADBmMQswCQYDVQQGEwJV
UzELMAkGA1UECBMCQ0ExEjAQBgNVBAcTCVNhbiBEaWVnbzENMAsGA1UEChMEVUNT
-RDETMBEGA1UECxMKS25pZ2h0IExhYjESMBAGA1UEAxMJbG9jYWxob3N0MB4XDTE2
-MTIxOTE2MDA1NFoXDTE3MDExODE2MDA1NFowZjELMAkGA1UEBhMCVVMxCzAJBgNV
-BAgTAkNBMRIwEAYDVQQHEwlTYW4gRGllZ28xDTALBgNVBAoTBFVDU0QxEzARBgNV
-BAsTCktuaWdodCBMYWIxEjAQBgNVBAMTCWxvY2FsaG9zdDCBnzANBgkqhkiG9w0B
-AQEFAAOBjQAwgYkCgYEAq6ChN/5vk1fn45Ys5inttHe8IntBQtU31oKy+2IR+znT
-GBvG/iht0veG5sbjlkm+Hn4auk5lR9EOmnTy+fl44LJ81rZuYmy3mjLSAHwmx7ee
-ZTJ2lNjH/Blq5vC4VmPQ3Ka7zMusOTZSBDw6k8r6bxbMgarXc+rQtDvQfv2QITsC
-AwEAATANBgkqhkiG9w0BAQUFAAOBgQBBir71K7HdTbU7129ZYLDyeXJfAjzCsSxj
-evSqa6PJuh5PODdPyO01Hyxb5J/aHzmE5FRZKMLdgOTlqCpQjyMMvVc6UJzX5bZo
-x6Y5gvoTNeCfaD0N6eZxxd7BqFGq+gmqk5U1cyKf+QjIhu/Q4p/Ga+Cx9b3t/Sk+
-/iUPu/otBw==
+RDETMBEGA1UECxMKS25pZ2h0IExhYjESMBAGA1UEAxMJbG9jYWxob3N0MCAXDTE3
+MDExOTA4MTQ1NloYDzIxMTYxMjI2MDgxNDU2WjBmMQswCQYDVQQGEwJVUzELMAkG
+A1UECBMCQ0ExEjAQBgNVBAcTCVNhbiBEaWVnbzENMAsGA1UEChMEVUNTRDETMBEG
+A1UECxMKS25pZ2h0IExhYjESMBAGA1UEAxMJbG9jYWxob3N0MIGfMA0GCSqGSIb3
+DQEBAQUAA4GNADCBiQKBgQCroKE3/m+TV+fjlizmKe20d7wie0FC1TfWgrL7YhH7
+OdMYG8b+KG3S94bmxuOWSb4efhq6TmVH0Q6adPL5+XjgsnzWtm5ibLeaMtIAfCbH
+t55lMnaU2Mf8GWrm8LhWY9DcprvMy6w5NlIEPDqTyvpvFsyBqtdz6tC0O9B+/ZAh
+OwIDAQABMA0GCSqGSIb3DQEBBQUAA4GBAGLau2DhrdnR5P2C2rGZuSaHLYCsVPJO
+nj3Q+v5md1UzTDitlzHwM3pX1QBLxfiTJ6e7/0QLkrDceYKOfU/eucLGM1KG1YjS
+nB39W2BNLKXu4QXWJUx4WC1Qxib9wbxxm4NyMb0ir2/PZTs+gKMtguBUyVHqETvs
+n1b0mapYTJ/Q
-----END CERTIFICATE-----
diff --git a/qiita_db/artifact.py b/qiita_db/artifact.py
index fbd5129da..3acbe2d11 100644
--- a/qiita_db/artifact.py
+++ b/qiita_db/artifact.py
@@ -420,6 +420,15 @@ def _associate_with_analysis(instance, analysis_id):
sql_args = [(instance.id, p.id) for p in parents]
qdb.sql_connection.TRN.add(sql, sql_args, many=True)
+ # inheriting visibility
+ visibilities = {a.visibility for a in instance.parents}
+ # set based on the "lowest" visibility
+ if 'sandbox' in visibilities:
+ instance.visibility = 'sandbox'
+ elif 'private' in visibilities:
+ instance.visibility = 'private'
+ else:
+ instance.visibility = 'public'
elif prep_template:
# This artifact is uploaded by the user in the
# processing pipeline
diff --git a/qiita_db/meta_util.py b/qiita_db/meta_util.py
index 1f8956db5..33a84de37 100644
--- a/qiita_db/meta_util.py
+++ b/qiita_db/meta_util.py
@@ -13,7 +13,6 @@
..autosummary::
:toctree: generated/
- get_accessible_filepath_ids
get_lat_longs
"""
# -----------------------------------------------------------------------------
@@ -25,7 +24,16 @@
# -----------------------------------------------------------------------------
from __future__ import division
-from itertools import chain
+from moi import r_client
+from os import stat
+from time import strftime, localtime
+import matplotlib.pyplot as plt
+import matplotlib as mpl
+from base64 import b64encode
+from urllib import quote
+from StringIO import StringIO
+from future.utils import viewitems
+from datetime import datetime
from qiita_core.qiita_settings import qiita_config
import qiita_db as qdb
@@ -50,85 +58,232 @@ def _get_data_fpids(constructor, object_id):
return {fpid for fpid, _, _ in obj.get_filepaths()}
-def get_accessible_filepath_ids(user):
- """Gets all filepaths that this user should have access to
-
- This gets all raw, preprocessed, and processed filepaths, for studies
- that the user has access to, as well as all the mapping files and biom
- tables associated with the analyses that the user has access to.
+def validate_filepath_access_by_user(user, filepath_id):
+ """Validates if the user has access to the filepath_id
Parameters
----------
user : User object
The user we are interested in
-
+ filepath_id : int
+ The filepath id
Returns
-------
- set
- A set of filepath ids
+ bool
+ If the user has access or not to the filepath_id
Notes
-----
- Admins have access to all files, so all filepath ids are returned for
- admins
+ Admins have access to all files so True is always returned
"""
- with qdb.sql_connection.TRN:
+ TRN = qdb.sql_connection.TRN
+ with TRN:
if user.level == "admin":
# admins have access all files
- qdb.sql_connection.TRN.add(
- "SELECT filepath_id FROM qiita.filepath")
- return set(qdb.sql_connection.TRN.execute_fetchflatten())
-
- # First, the studies
- # There are private and shared studies
- studies = user.user_studies | user.shared_studies
-
- filepath_ids = set()
- for study in studies:
- # Add the sample template files
- if study.sample_template:
- filepath_ids.update(
- {fid for fid, _ in study.sample_template.get_filepaths()})
-
- # Add the prep template filepaths
- for pt in study.prep_templates():
- filepath_ids.update({fid for fid, _ in pt.get_filepaths()})
-
- # Add the artifact filepaths
- for artifact in study.artifacts():
- filepath_ids.update({fid for fid, _, _ in artifact.filepaths})
-
- # Next, the public artifacts
- for artifact in qdb.artifact.Artifact.iter_public():
- # Add the filepaths of the artifact
- filepath_ids.update({fid for fid, _, _ in artifact.filepaths})
-
- # Then add the filepaths of the prep templates
- for pt in artifact.prep_templates:
- filepath_ids.update({fid for fid, _ in pt.get_filepaths()})
-
- # Then add the filepaths of the sample template
- study = artifact.study
- if study:
- filepath_ids.update(
- {fid
- for fid, _ in study.sample_template.get_filepaths()})
-
- # Next, analyses
- # Same as before, there are public, private, and shared
- analyses = qdb.analysis.Analysis.get_by_status('public') | \
- user.private_analyses | user.shared_analyses
-
- if analyses:
- sql = """SELECT filepath_id
- FROM qiita.analysis_filepath
- WHERE analysis_id IN %s"""
- sql_args = tuple([a.id for a in analyses])
- qdb.sql_connection.TRN.add(sql, [sql_args])
- filepath_ids.update(qdb.sql_connection.TRN.execute_fetchflatten())
-
- return filepath_ids
+ return True
+
+ sql = """SELECT
+ (SELECT array_agg(artifact_id)
+ FROM qiita.artifact_filepath
+ WHERE filepath_id = {0}) AS artifact,
+ (SELECT array_agg(study_id)
+ FROM qiita.sample_template_filepath
+ WHERE filepath_id = {0}) AS sample_info,
+ (SELECT array_agg(prep_template_id)
+ FROM qiita.prep_template_filepath
+ WHERE filepath_id = {0}) AS prep_info,
+ (SELECT array_agg(analysis_id)
+ FROM qiita.analysis_filepath
+ WHERE filepath_id = {0}) AS analysis""".format(filepath_id)
+ TRN.add(sql)
+
+ arid, sid, pid, anid = TRN.execute_fetchflatten()
+
+ # artifacts
+ if arid:
+ # [0] cause we should only have 1
+ artifact = qdb.artifact.Artifact(arid[0])
+ if artifact.visibility == 'public':
+ return True
+ else:
+ study = artifact.study
+ if study:
+ # let's take the visibility via the Study
+ return artifact.study.has_access(user)
+ else:
+ analysis = artifact.analysis
+ return analysis in (
+ user.private_analyses | user.shared_analyses)
+ # sample info files
+ elif sid:
+ # the visibility of the sample info file is given by the
+ # study visibility
+ # [0] cause we should only have 1
+ return qdb.study.Study(sid[0]).has_access(user)
+ # prep info files
+ elif pid:
+ # the prep access is given by it's artifacts, if the user has
+ # access to any artifact, it should have access to the prep
+ # [0] cause we should only have 1
+ a = qdb.metadata_template.prep_template.PrepTemplate(
+ pid[0]).artifact
+ if (a.visibility == 'public' or a.study.has_access(user)):
+ return True
+ else:
+ for c in a.descendants.nodes():
+ if (c.visibility == 'public' or c.study.has_access(user)):
+ return True
+ return False
+ # analyses
+ elif anid:
+ # [0] cause we should only have 1
+ aid = anid[0]
+ analysis = qdb.analysis.Analysis(aid)
+ return analysis in (
+ user.private_analyses | user.shared_analyses)
+ return False
+
+
+def update_redis_stats():
+ """Generate the system stats and save them in redis
+
+ Returns
+ -------
+ list of str
+ artifact filepaths that are not present in the file system
+ """
+ STUDY = qdb.study.Study
+ studies = {'public': STUDY.get_by_status('private'),
+ 'private': STUDY.get_by_status('public'),
+ 'sanbox': STUDY.get_by_status('sandbox')}
+ number_studies = {k: len(v) for k, v in viewitems(studies)}
+
+ number_of_samples = {}
+ ebi_samples_prep = {}
+ num_samples_ebi = 0
+ for k, sts in viewitems(studies):
+ number_of_samples[k] = 0
+ for s in sts:
+ st = s.sample_template
+ if st is not None:
+ number_of_samples[k] += len(list(st.keys()))
+
+ ebi_samples_prep_count = 0
+ for pt in s.prep_templates():
+ ebi_samples_prep_count += len([
+ 1 for _, v in viewitems(pt.ebi_experiment_accessions)
+ if v is not None and v != ''])
+ ebi_samples_prep[s.id] = ebi_samples_prep_count
+
+ if s.sample_template is not None:
+ num_samples_ebi += len([
+ 1 for _, v in viewitems(
+ s.sample_template.ebi_sample_accessions)
+ if v is not None and v != ''])
+
+ num_users = qdb.util.get_count('qiita.qiita_user')
+
+ lat_longs = get_lat_longs()
+
+ num_studies_ebi = len(ebi_samples_prep)
+ number_samples_ebi_prep = sum([v for _, v in viewitems(ebi_samples_prep)])
+
+ # generating file size stats
+ stats = []
+ missing_files = []
+ for k, sts in viewitems(studies):
+ for s in sts:
+ for a in s.artifacts():
+ for _, fp, dt in a.filepaths:
+ try:
+ s = stat(fp)
+ stats.append((dt, s.st_size, strftime('%Y-%m',
+ localtime(s.st_ctime))))
+ except OSError:
+ missing_files.append(fp)
+
+ summary = {}
+ all_dates = []
+ for ft, size, ym in stats:
+ if ft not in summary:
+ summary[ft] = {}
+ if ym not in summary[ft]:
+ summary[ft][ym] = 0
+ all_dates.append(ym)
+ summary[ft][ym] += size
+ all_dates = sorted(set(all_dates))
+
+ # sorting summaries
+ rm_from_data = ['html_summary', 'tgz', 'directory', 'raw_fasta', 'log',
+ 'biom', 'raw_sff', 'raw_qual']
+ ordered_summary = {}
+ for dt in summary:
+ if dt in rm_from_data:
+ continue
+ new_list = []
+ current_value = 0
+ for ad in all_dates:
+ if ad in summary[dt]:
+ current_value += summary[dt][ad]
+ new_list.append(current_value)
+ ordered_summary[dt] = new_list
+
+ plot_order = sorted([(k, ordered_summary[k][-1]) for k in ordered_summary],
+ key=lambda x: x[1])
+
+ # helper function to generate y axis, modified from:
+ # http://stackoverflow.com/a/1094933
+ def sizeof_fmt(value, position):
+ number = None
+ for unit in ['', 'K', 'M', 'G', 'T', 'P', 'E', 'Z']:
+ if abs(value) < 1024.0:
+ number = "%3.1f%s" % (value, unit)
+ break
+ value /= 1024.0
+ if number is None:
+ number = "%.1f%s" % (value, 'Yi')
+ return number
+
+ all_dates_axis = range(len(all_dates))
+ plt.locator_params(axis='y', nbins=10)
+ plt.figure(figsize=(20, 10))
+ for k, v in plot_order:
+ plt.plot(all_dates_axis, ordered_summary[k], linewidth=2, label=k)
+
+ plt.xticks(all_dates_axis, all_dates)
+ plt.legend()
+ plt.grid()
+ ax = plt.gca()
+ ax.yaxis.set_major_formatter(mpl.ticker.FuncFormatter(sizeof_fmt))
+ plt.xlabel('Date')
+ plt.ylabel('Storage space per data type')
+
+ plot = StringIO()
+ plt.savefig(plot, format='png')
+ plot.seek(0)
+ img = 'data:image/png;base64,' + quote(b64encode(plot.buf))
+
+ time = datetime.now().strftime('%m-%d-%y %H:%M:%S')
+
+ portal = qiita_config.portal
+ vals = [
+ ('number_studies', number_studies, r_client.hmset),
+ ('number_of_samples', number_of_samples, r_client.hmset),
+ ('num_users', num_users, r_client.set),
+ ('lat_longs', lat_longs, r_client.set),
+ ('num_studies_ebi', num_studies_ebi, r_client.set),
+ ('num_samples_ebi', num_samples_ebi, r_client.set),
+ ('number_samples_ebi_prep', number_samples_ebi_prep, r_client.set),
+ ('img', img, r_client.set),
+ ('time', time, r_client.set)]
+ for k, v, f in vals:
+ redis_key = '%s:stats:%s' % (portal, k)
+ # important to "flush" variables to avoid errors
+ r_client.delete(redis_key)
+ f(redis_key, v)
+
+ return missing_files
def get_lat_longs():
@@ -143,6 +298,7 @@ def get_lat_longs():
s.id for s in qdb.portal.Portal(qiita_config.portal).get_studies()]
with qdb.sql_connection.TRN:
+ # getting all tables in the portal
sql = """SELECT DISTINCT table_name
FROM information_schema.columns
WHERE table_name SIMILAR TO 'sample_[0-9]+'
@@ -151,16 +307,14 @@ def get_lat_longs():
AND SPLIT_PART(table_name, '_', 2)::int IN %s;"""
qdb.sql_connection.TRN.add(sql, [tuple(portal_table_ids)])
- sql = """SELECT CAST(latitude AS FLOAT), CAST(longitude AS FLOAT)
- FROM qiita.{0}
- WHERE isnumeric(latitude) AND isnumeric(latitude)"""
- idx = qdb.sql_connection.TRN.index
-
- portal_tables = qdb.sql_connection.TRN.execute_fetchflatten()
-
- ebi_null = tuple(qdb.metadata_template.constants.EBI_NULL_VALUES)
- for table in portal_tables:
- qdb.sql_connection.TRN.add(sql.format(table), [ebi_null, ebi_null])
-
- return list(
- chain.from_iterable(qdb.sql_connection.TRN.execute()[idx:]))
+ sql = [('SELECT CAST(latitude AS FLOAT), '
+ ' CAST(longitude AS FLOAT) '
+ 'FROM qiita.%s '
+ 'WHERE isnumeric(latitude) AND isnumeric(longitude) '
+ "AND latitude <> 'NaN' "
+ "AND longitude <> 'NaN' " % s)
+ for s in qdb.sql_connection.TRN.execute_fetchflatten()]
+ sql = ' UNION '.join(sql)
+ qdb.sql_connection.TRN.add(sql)
+
+ return qdb.sql_connection.TRN.execute_fetchindex()
diff --git a/qiita_db/metadata_template/test/test_util.py b/qiita_db/metadata_template/test/test_util.py
index fc9f564de..ad43be487 100644
--- a/qiita_db/metadata_template/test/test_util.py
+++ b/qiita_db/metadata_template/test/test_util.py
@@ -359,7 +359,8 @@ def test_get_pgsql_reserved_words(self):
"has_physical_specimen\thost_subject_id\tint_column\tlatitude\tlongitude\t"
"physical_location\trequired_sample_info_status\tsample_type\t"
"str_column\n"
- "2.Sample1 \t05/29/2014 12:24:51\tTest Sample 1\tTrue\tTrue\t"
+ "2.Sample1 \t05/29/2014 12:24:51\tTest Sample 1\t"
+ '"True\t"\t"\nTrue"\t'
"NotIdentified\t1\t42.42\t41.41\tlocation1\treceived\ttype1\t"
"Value for sample 1\n"
"2.Sample2 \t05/29/2014 12:24:51\t"
diff --git a/qiita_db/metadata_template/util.py b/qiita_db/metadata_template/util.py
index dddcdcf45..ce2b520da 100644
--- a/qiita_db/metadata_template/util.py
+++ b/qiita_db/metadata_template/util.py
@@ -60,16 +60,13 @@ def prefix_sample_names_with_id(md_template, study_id):
md_template.index.name = None
-def load_template_to_dataframe(fn, strip_whitespace=True, index='sample_name'):
+def load_template_to_dataframe(fn, index='sample_name'):
"""Load a sample/prep template or a QIIME mapping file into a data frame
Parameters
----------
fn : str or file-like object
filename of the template to load, or an already open template file
- strip_whitespace : bool, optional
- Defaults to True. Whether or not to strip whitespace from values in the
- input file
index : str, optional
Defaults to 'sample_name'. The index to use in the loaded information
@@ -110,19 +107,6 @@ def load_template_to_dataframe(fn, strip_whitespace=True, index='sample_name'):
if not holdfile:
raise ValueError('Empty file passed!')
- # Strip all values in the cells in the input file, if requested
- if strip_whitespace:
- for pos, line in enumerate(holdfile):
- holdfile[pos] = '\t'.join(d.strip(" \r\x0b\x0c")
- for d in line.split('\t'))
-
- # get and clean the controlled columns
- cols = holdfile[0].split('\t')
- controlled_cols = {'sample_name'}
- controlled_cols.update(qdb.metadata_template.constants.CONTROLLED_COLS)
- holdfile[0] = '\t'.join(c.lower() if c.lower() in controlled_cols else c
- for c in cols)
-
if index == "#SampleID":
# We're going to parse a QIIME mapping file. We are going to first
# parse it with the QIIME function so we can remove the comments
@@ -133,11 +117,29 @@ def load_template_to_dataframe(fn, strip_whitespace=True, index='sample_name'):
# The QIIME parser fixes the index and removes the #
index = 'SampleID'
- # Check that we don't have duplicate columns
- col_names = [c.lower() for c in holdfile[0].strip().split('\t')]
- if len(set(col_names)) != len(col_names):
- raise qdb.exceptions.QiitaDBDuplicateHeaderError(
- find_duplicates(col_names))
+ # Strip all values in the cells in the input file
+ for pos, line in enumerate(holdfile):
+ cols = line.split('\t')
+ if pos == 0 and index != 'SampleID':
+ # get and clean the controlled columns
+ ccols = {'sample_name'}
+ ccols.update(qdb.metadata_template.constants.CONTROLLED_COLS)
+ newcols = [
+ c.lower().strip() if c.lower().strip() in ccols
+ else c.strip()
+ for c in cols]
+
+ # while we are here, let's check for duplicate columns headers
+ if len(set(newcols)) != len(newcols):
+ raise qdb.exceptions.QiitaDBDuplicateHeaderError(
+ find_duplicates(newcols))
+ else:
+ # .strip will remove odd chars, newlines, tabs and multiple
+ # spaces but we need to read a new line at the end of the
+ # line(+'\n')
+ newcols = [d.strip(" \r\x0b\x0c\n") for d in cols]
+
+ holdfile[pos] = '\t'.join(newcols) + '\n'
# index_col:
# is set as False, otherwise it is cast as a float and we want a string
@@ -158,6 +160,9 @@ def load_template_to_dataframe(fn, strip_whitespace=True, index='sample_name'):
index_col=False,
comment='\t',
converters={index: lambda x: str(x).strip()})
+ # remove newlines and tabs from fields
+ template.replace(to_replace='[\t\n\r\x0b\x0c]+', value='',
+ regex=True, inplace=True)
except UnicodeDecodeError:
# Find row number and col number for utf-8 encoding errors
headers = holdfile[0].strip().split('\t')
diff --git a/qiita_db/study.py b/qiita_db/study.py
index 03e6ef607..ef21feddc 100644
--- a/qiita_db/study.py
+++ b/qiita_db/study.py
@@ -464,6 +464,42 @@ def delete(cls, id_):
qdb.sql_connection.TRN.execute()
+ @classmethod
+ def get_tags(cls):
+ """Returns the available study tags
+
+ Returns
+ -------
+ list of DictCursor
+ Table-like structure of metadata, one tag per row. Can be
+ accessed as a list of dictionaries, keyed on column name.
+ """
+ with qdb.sql_connection.TRN:
+ sql = """SELECT study_tag_id, study_tag
+ FROM qiita.study_tags"""
+
+ qdb.sql_connection.TRN.add(sql)
+ return qdb.sql_connection.TRN.execute_fetchindex()
+
+ @classmethod
+ def insert_tags(cls, user, tags):
+ """Insert available study tags
+
+ Parameters
+ ----------
+ user : qiita_db.user.User
+ The user adding the tags
+ tags : list of str
+ The list of tags to add
+ """
+ with qdb.sql_connection.TRN:
+ sql = """INSERT INTO qiita.study_tags (email, study_tag)
+ VALUES (%s, %s)"""
+ sql_args = [[user.email, tag] for tag in tags]
+
+ qdb.sql_connection.TRN.add(sql, sql_args, many=True)
+ qdb.sql_connection.TRN.execute()
+
# --- Attributes ---
@property
@@ -921,7 +957,52 @@ def ebi_submission_status(self, value):
ebi_submission_status.__doc__.format(', '.join(_VALID_EBI_STATUS))
- # --- methods ---
+ @property
+ def tags(self):
+ """Returns the tags of the study
+
+ Returns
+ -------
+ list of str
+ The study tags
+ """
+ with qdb.sql_connection.TRN:
+ sql = """SELECT study_tag_id, study_tag
+ FROM qiita.study_tags
+ LEFT JOIN qiita.per_study_tags USING (study_tag_id)
+ WHERE study_id = {0}""".format(self._id)
+ qdb.sql_connection.TRN.add(sql)
+ return qdb.sql_connection.TRN.execute_fetchindex()
+
+ @tags.setter
+ def tags(self, tag_ids):
+ """Sets the tags of the study
+
+ Parameters
+ ----------
+ tag_ids : list of int
+ The tag ids of the study
+ """
+ with qdb.sql_connection.TRN:
+ sql = """DELETE FROM qiita.per_study_tags WHERE study_id = %s"""
+ qdb.sql_connection.TRN.add(sql, [self._id])
+
+ if tag_ids:
+ sql = """INSERT INTO qiita.per_study_tags
+ (study_tag_id, study_id)
+ SELECT %s, %s
+ WHERE
+ NOT EXISTS (
+ SELECT study_tag_id, study_id
+ FROM qiita.per_study_tags
+ WHERE study_tag_id = %s AND study_id = %s
+ )"""
+ sql_args = [[tid, self._id, tid, self._id] for tid in tag_ids]
+ qdb.sql_connection.TRN.add(sql, sql_args, many=True)
+
+ qdb.sql_connection.TRN.execute()
+
+# --- methods ---
def artifacts(self, dtype=None, artifact_type=None):
"""Returns the list of artifacts associated with the study
diff --git a/qiita_db/support_files/patches/47.sql b/qiita_db/support_files/patches/47.sql
index 13b1fbccb..077bb6690 100644
--- a/qiita_db/support_files/patches/47.sql
+++ b/qiita_db/support_files/patches/47.sql
@@ -1,115 +1,5 @@
--- Jan 5, 2017
--- Move the analysis to the plugin system. This is a major rewrite of the
--- database backend that supports the analysis pipeline.
--- After exploring the data on the database, we realized that
--- there are a lot of inconsistencies in the data. Unfortunately, this
--- makes the process of transferring the data from the old structure
--- to the new one a bit more challenging, as we will need to handle
--- different special cases. Furthermore, all the information needed is not
--- present in the database, since it requires checking BIOM files. Due to these
--- reason, the vast majority of the data transfer is done in the python patch
--- 47.py
+-- Jan 15, 2017
+-- Inherit the status of the study to all it's artifacts.
+-- This code is much easier using python so check that patch
--- In this file we are just creating the new data structures. The old
--- datastructure will be dropped in the python patch once all data has been
--- transferred.
-
--- Create the new data structures
-
--- Table that links the analysis with the initial set of artifacts
-CREATE TABLE qiita.analysis_artifact (
- analysis_id bigint NOT NULL,
- artifact_id bigint NOT NULL,
- CONSTRAINT idx_analysis_artifact_0 PRIMARY KEY (analysis_id, artifact_id)
-);
-CREATE INDEX idx_analysis_artifact_analysis ON qiita.analysis_artifact (analysis_id);
-CREATE INDEX idx_analysis_artifact_artifact ON qiita.analysis_artifact (artifact_id);
-ALTER TABLE qiita.analysis_artifact ADD CONSTRAINT fk_analysis_artifact_analysis FOREIGN KEY ( analysis_id ) REFERENCES qiita.analysis( analysis_id );
-ALTER TABLE qiita.analysis_artifact ADD CONSTRAINT fk_analysis_artifact_artifact FOREIGN KEY ( artifact_id ) REFERENCES qiita.artifact( artifact_id );
-
--- Droping the analysis status column cause now it depends on the artifacts
--- status, like the study does.
-ALTER TABLE qiita.analysis DROP COLUMN analysis_status_id;
-
--- Create a table to link the analysis with the jobs that create the initial
--- artifacts
-CREATE TABLE qiita.analysis_processing_job (
- analysis_id bigint NOT NULL,
- processing_job_id uuid NOT NULL,
- CONSTRAINT idx_analysis_processing_job PRIMARY KEY ( analysis_id, processing_job_id )
- ) ;
-
-CREATE INDEX idx_analysis_processing_job_analysis ON qiita.analysis_processing_job ( analysis_id ) ;
-CREATE INDEX idx_analysis_processing_job_pj ON qiita.analysis_processing_job ( processing_job_id ) ;
-ALTER TABLE qiita.analysis_processing_job ADD CONSTRAINT fk_analysis_processing_job FOREIGN KEY ( analysis_id ) REFERENCES qiita.analysis( analysis_id ) ;
-ALTER TABLE qiita.analysis_processing_job ADD CONSTRAINT fk_analysis_processing_job_pj FOREIGN KEY ( processing_job_id ) REFERENCES qiita.processing_job( processing_job_id ) ;
-
--- Add a logging column in the analysis
-ALTER TABLE qiita.analysis ADD logging_id bigint ;
-CREATE INDEX idx_analysis_0 ON qiita.analysis ( logging_id ) ;
-ALTER TABLE qiita.analysis ADD CONSTRAINT fk_analysis_logging FOREIGN KEY ( logging_id ) REFERENCES qiita.logging( logging_id ) ;
-
--- We can handle some of the special cases here, so we simplify the work in the
--- python patch
-
--- Special case 1: there are jobs in the database that do not contain
--- any information about the options used to process those parameters.
--- However, these jobs do not have any results and all are marked either
--- as queued or error, although no error log has been saved. Since these
--- jobs are mainly useleess, we are going to remove them from the system
-DELETE FROM qiita.analysis_job
- WHERE job_id IN (SELECT job_id FROM qiita.job WHERE options = '{}');
-DELETE FROM qiita.job WHERE options = '{}';
-
--- Special case 2: there are a fair amount of jobs (719 last time I
--- checked) that are not attached to any analysis. Not sure how this
--- can happen, but these orphan jobs can't be accessed from anywhere
--- in the interface. Remove them from the system. Note that we are
--- unlinking the files but we are not removing them from the filepath
--- table. We will do that on the patch 47.py using the
--- purge_filepaths function, as it will make sure that those files are
--- not used anywhere else
-DELETE FROM qiita.job_results_filepath WHERE job_id IN (
- SELECT job_id FROM qiita.job J WHERE NOT EXISTS (
- SELECT * FROM qiita.analysis_job AJ WHERE J.job_id = AJ.job_id));
-DELETE FROM qiita.job J WHERE NOT EXISTS (
- SELECT * FROM qiita.analysis_job AJ WHERE J.job_id = AJ.job_id);
-
--- In the analysis pipeline, an artifact can have mutliple datatypes
--- (e.g. procrustes). Allow this by creating a new data_type being "multiomic"
-INSERT INTO qiita.data_type (data_type) VALUES ('Multiomic');
-
-
--- The valdiate command from BIOM will have an extra parameter, analysis
--- Magic number -> 4 BIOM command_id -> known for sure since it was added in
--- patch 36.sql
-INSERT INTO qiita.command_parameter (command_id, parameter_name, parameter_type, required)
- VALUES (4, 'analysis', 'analysis', FALSE);
--- The template comand now becomes optional, since it can be added either to
--- an analysis or to a prep template. command_parameter_id known from patch
--- 36.sql
-UPDATE qiita.command_parameter SET required = FALSE WHERE command_parameter_id = 34;
-
--- We are going to add a new special software type, and a new software.
--- This is going to be used internally by Qiita, so submit the private jobs.
--- This is needed for the analysis.
-INSERT INTO qiita.software_type (software_type, description)
- VALUES ('private', 'Internal Qiita jobs');
-
-DO $do$
-DECLARE
- qiita_sw_id bigint;
- baf_cmd_id bigint;
-BEGIN
- INSERT INTO qiita.software (name, version, description, environment_script, start_script, software_type_id, active)
- VALUES ('Qiita', 'alpha', 'Internal Qiita jobs', 'source activate qiita', 'qiita-private-2', 3, True)
- RETURNING software_id INTO qiita_sw_id;
-
- INSERT INTO qiita.software_command (software_id, name, description)
- VALUES (qiita_sw_id, 'build_analysis_files', 'Builds the files needed for the analysis')
- RETURNING command_id INTO baf_cmd_id;
-
- INSERT INTO qiita.command_parameter (command_id, parameter_name, parameter_type, required, default_value)
- VALUES (baf_cmd_id, 'analysis', 'analysis', True, NULL),
- (baf_cmd_id, 'merge_dup_sample_ids', 'bool', False, 'False');
-END $do$
+SELECT 1;
diff --git a/qiita_db/support_files/patches/48.sql b/qiita_db/support_files/patches/48.sql
new file mode 100644
index 000000000..f18e28868
--- /dev/null
+++ b/qiita_db/support_files/patches/48.sql
@@ -0,0 +1,4 @@
+-- Jan 20, 2017
+-- see py file
+
+SELECT 1;
diff --git a/qiita_db/support_files/patches/49.sql b/qiita_db/support_files/patches/49.sql
new file mode 100644
index 000000000..4b2b3c42a
--- /dev/null
+++ b/qiita_db/support_files/patches/49.sql
@@ -0,0 +1,6 @@
+-- Jan 27, 2017
+-- sequeneces -> sequences
+
+UPDATE qiita.artifact_type SET description = 'Demultiplexed and QC sequences'
+ WHERE artifact_type = 'Demultiplexed'
+ AND description = 'Demultiplexed and QC sequeneces';
diff --git a/qiita_db/support_files/patches/50.sql b/qiita_db/support_files/patches/50.sql
new file mode 100644
index 000000000..f732ef7b5
--- /dev/null
+++ b/qiita_db/support_files/patches/50.sql
@@ -0,0 +1,19 @@
+-- Feb 3, 2017
+-- adding study tagging system
+
+CREATE TABLE qiita.study_tags (
+ study_tag_id bigserial NOT NULL,
+ email varchar NOT NULL,
+ study_tag varchar NOT NULL,
+ CONSTRAINT pk_study_tag UNIQUE ( study_tag ),
+ CONSTRAINT pk_study_tag_id PRIMARY KEY ( study_tag_id )
+) ;
+
+CREATE INDEX idx_study_tag_id ON qiita.study_tags ( study_tag_id ) ;
+ALTER TABLE qiita.study_tags ADD CONSTRAINT fk_study_tags FOREIGN KEY ( email ) REFERENCES qiita.qiita_user( email );
+
+CREATE TABLE qiita.per_study_tags (
+ study_tag_id bigint NOT NULL,
+ study_id bigint NOT NULL,
+ CONSTRAINT pk_per_study_tags PRIMARY KEY ( study_tag_id, study_id )
+) ;
diff --git a/qiita_db/support_files/patches/51.sql b/qiita_db/support_files/patches/51.sql
new file mode 100644
index 000000000..a484d5c24
--- /dev/null
+++ b/qiita_db/support_files/patches/51.sql
@@ -0,0 +1,115 @@
+-- Jan 5, 2017
+-- Move the analysis to the plugin system. This is a major rewrite of the
+-- database backend that supports the analysis pipeline.
+-- After exploring the data on the database, we realized that
+-- there are a lot of inconsistencies in the data. Unfortunately, this
+-- makes the process of transferring the data from the old structure
+-- to the new one a bit more challenging, as we will need to handle
+-- different special cases. Furthermore, all the information needed is not
+-- present in the database, since it requires checking BIOM files. Due to these
+-- reason, the vast majority of the data transfer is done in the python patch
+-- 51.py
+
+-- In this file we are just creating the new data structures. The old
+-- datastructure will be dropped in the python patch once all data has been
+-- transferred.
+
+-- Create the new data structures
+
+-- Table that links the analysis with the initial set of artifacts
+CREATE TABLE qiita.analysis_artifact (
+ analysis_id bigint NOT NULL,
+ artifact_id bigint NOT NULL,
+ CONSTRAINT idx_analysis_artifact_0 PRIMARY KEY (analysis_id, artifact_id)
+);
+CREATE INDEX idx_analysis_artifact_analysis ON qiita.analysis_artifact (analysis_id);
+CREATE INDEX idx_analysis_artifact_artifact ON qiita.analysis_artifact (artifact_id);
+ALTER TABLE qiita.analysis_artifact ADD CONSTRAINT fk_analysis_artifact_analysis FOREIGN KEY ( analysis_id ) REFERENCES qiita.analysis( analysis_id );
+ALTER TABLE qiita.analysis_artifact ADD CONSTRAINT fk_analysis_artifact_artifact FOREIGN KEY ( artifact_id ) REFERENCES qiita.artifact( artifact_id );
+
+-- Droping the analysis status column cause now it depends on the artifacts
+-- status, like the study does.
+ALTER TABLE qiita.analysis DROP COLUMN analysis_status_id;
+
+-- Create a table to link the analysis with the jobs that create the initial
+-- artifacts
+CREATE TABLE qiita.analysis_processing_job (
+ analysis_id bigint NOT NULL,
+ processing_job_id uuid NOT NULL,
+ CONSTRAINT idx_analysis_processing_job PRIMARY KEY ( analysis_id, processing_job_id )
+ ) ;
+
+CREATE INDEX idx_analysis_processing_job_analysis ON qiita.analysis_processing_job ( analysis_id ) ;
+CREATE INDEX idx_analysis_processing_job_pj ON qiita.analysis_processing_job ( processing_job_id ) ;
+ALTER TABLE qiita.analysis_processing_job ADD CONSTRAINT fk_analysis_processing_job FOREIGN KEY ( analysis_id ) REFERENCES qiita.analysis( analysis_id ) ;
+ALTER TABLE qiita.analysis_processing_job ADD CONSTRAINT fk_analysis_processing_job_pj FOREIGN KEY ( processing_job_id ) REFERENCES qiita.processing_job( processing_job_id ) ;
+
+-- Add a logging column in the analysis
+ALTER TABLE qiita.analysis ADD logging_id bigint ;
+CREATE INDEX idx_analysis_0 ON qiita.analysis ( logging_id ) ;
+ALTER TABLE qiita.analysis ADD CONSTRAINT fk_analysis_logging FOREIGN KEY ( logging_id ) REFERENCES qiita.logging( logging_id ) ;
+
+-- We can handle some of the special cases here, so we simplify the work in the
+-- python patch
+
+-- Special case 1: there are jobs in the database that do not contain
+-- any information about the options used to process those parameters.
+-- However, these jobs do not have any results and all are marked either
+-- as queued or error, although no error log has been saved. Since these
+-- jobs are mainly useleess, we are going to remove them from the system
+DELETE FROM qiita.analysis_job
+ WHERE job_id IN (SELECT job_id FROM qiita.job WHERE options = '{}');
+DELETE FROM qiita.job WHERE options = '{}';
+
+-- Special case 2: there are a fair amount of jobs (719 last time I
+-- checked) that are not attached to any analysis. Not sure how this
+-- can happen, but these orphan jobs can't be accessed from anywhere
+-- in the interface. Remove them from the system. Note that we are
+-- unlinking the files but we are not removing them from the filepath
+-- table. We will do that on the patch 47.py using the
+-- purge_filepaths function, as it will make sure that those files are
+-- not used anywhere else
+DELETE FROM qiita.job_results_filepath WHERE job_id IN (
+ SELECT job_id FROM qiita.job J WHERE NOT EXISTS (
+ SELECT * FROM qiita.analysis_job AJ WHERE J.job_id = AJ.job_id));
+DELETE FROM qiita.job J WHERE NOT EXISTS (
+ SELECT * FROM qiita.analysis_job AJ WHERE J.job_id = AJ.job_id);
+
+-- In the analysis pipeline, an artifact can have mutliple datatypes
+-- (e.g. procrustes). Allow this by creating a new data_type being "multiomic"
+INSERT INTO qiita.data_type (data_type) VALUES ('Multiomic');
+
+
+-- The valdiate command from BIOM will have an extra parameter, analysis
+-- Magic number -> 4 BIOM command_id -> known for sure since it was added in
+-- patch 36.sql
+INSERT INTO qiita.command_parameter (command_id, parameter_name, parameter_type, required)
+ VALUES (4, 'analysis', 'analysis', FALSE);
+-- The template comand now becomes optional, since it can be added either to
+-- an analysis or to a prep template. command_parameter_id known from patch
+-- 36.sql
+UPDATE qiita.command_parameter SET required = FALSE WHERE command_parameter_id = 34;
+
+-- We are going to add a new special software type, and a new software.
+-- This is going to be used internally by Qiita, so submit the private jobs.
+-- This is needed for the analysis.
+INSERT INTO qiita.software_type (software_type, description)
+ VALUES ('private', 'Internal Qiita jobs');
+
+DO $do$
+DECLARE
+ qiita_sw_id bigint;
+ baf_cmd_id bigint;
+BEGIN
+ INSERT INTO qiita.software (name, version, description, environment_script, start_script, software_type_id, active)
+ VALUES ('Qiita', 'alpha', 'Internal Qiita jobs', 'source activate qiita', 'qiita-private-2', 3, True)
+ RETURNING software_id INTO qiita_sw_id;
+
+ INSERT INTO qiita.software_command (software_id, name, description)
+ VALUES (qiita_sw_id, 'build_analysis_files', 'Builds the files needed for the analysis')
+ RETURNING command_id INTO baf_cmd_id;
+
+ INSERT INTO qiita.command_parameter (command_id, parameter_name, parameter_type, required, default_value)
+ VALUES (baf_cmd_id, 'analysis', 'analysis', True, NULL),
+ (baf_cmd_id, 'merge_dup_sample_ids', 'bool', False, 'False');
+END $do$
diff --git a/qiita_db/support_files/patches/python_patches/47.py b/qiita_db/support_files/patches/python_patches/47.py
index 43f1b65a9..a325a7a2e 100644
--- a/qiita_db/support_files/patches/python_patches/47.py
+++ b/qiita_db/support_files/patches/python_patches/47.py
@@ -1,688 +1,30 @@
-# The code is commented with details on the changes implemented here,
-# but here is an overview of the changes needed to transfer the analysis
-# data to the plugins structure:
-# 1) Create a new type plugin to define the diversity types
-# 2) Create the new commands on the existing QIIME plugin to execute the
-# existing analyses (beta div, taxa summaries and alpha rarefaction)
-# 3) Transfer all the data in the old structures to the plugin structures
-# 4) Delete old structures
-
-from os.path import join, exists, basename
-from os import makedirs
-from json import loads
-
-from biom import load_table, Table
-from biom.util import biom_open
-
-from qiita_db.sql_connection import TRN
-from qiita_db.util import (get_db_files_base_dir, purge_filepaths,
- get_mountpoint, compute_checksum)
-from qiita_db.artifact import Artifact
-
-# Create some aux functions that are going to make the code more modular
-# and easier to understand, since there is a fair amount of work to do to
-# trasnfer the data from the old structure to the new one
-
-
-def create_non_rarefied_biom_artifact(analysis, biom_data, rarefied_table):
- """Creates the initial non-rarefied BIOM artifact of the analysis
-
- Parameters
- ----------
- analysis : dict
- Dictionary with the analysis information
- biom_data : dict
- Dictionary with the biom file information
- rarefied_table : biom.Table
- The rarefied BIOM table
-
- Returns
- -------
- int
- The id of the new artifact
- """
- # The non rarefied biom artifact is the initial biom table of the analysis.
- # This table does not currently exist anywhere, so we need to actually
- # create the BIOM file. To create this BIOM file we need: (1) the samples
- # and artifacts they come from and (2) whether the samples where
- # renamed or not. (1) is on the database, but we need to inferr (2) from
- # the existing rarefied BIOM table. Fun, fun...
-
- with TRN:
- # Get the samples included in the BIOM table grouped by artifact id
- # Note that the analysis contains a BIOM table per data type included
- # in it, and the table analysis_sample does not differentiate between
- # datatypes, so we need to check the data type in the artifact table
- sql = """SELECT artifact_id, array_agg(sample_id)
- FROM qiita.analysis_sample
- JOIN qiita.artifact USING (artifact_id)
- WHERE analysis_id = %s AND data_type_id = %s
- GROUP BY artifact_id"""
- TRN.add(sql, [analysis['analysis_id'], biom_data['data_type_id']])
- samples_by_artifact = TRN.execute_fetchindex()
-
- # Create an empty BIOM table to be the new master table
- new_table = Table([], [], [])
- ids_map = {}
- for a_id, samples in samples_by_artifact:
- # Get the filepath of the BIOM table from the artifact
- artifact = Artifact(a_id)
- biom_fp = None
- for _, fp, fp_type in artifact.filepaths:
- if fp_type == 'biom':
- biom_fp = fp
- # Note that we are sure that the biom table exists for sure, so
- # no need to check if biom_fp is undefined
- biom_table = load_table(biom_fp)
- biom_table.filter(samples, axis='sample', inplace=True)
- new_table = new_table.merge(biom_table)
- ids_map.update({sid: "%d.%s" % (a_id, sid)
- for sid in biom_table.ids()})
-
- # Check if we need to rename the sample ids in the biom table
- new_table_ids = set(new_table.ids())
- if not new_table_ids.issuperset(rarefied_table.ids()):
- # We need to rename the sample ids
- new_table.update_ids(ids_map, 'sample', True, True)
-
- sql = """INSERT INTO qiita.artifact
- (generated_timestamp, data_type_id, visibility_id,
- artifact_type_id, submitted_to_vamps)
- VALUES (%s, %s, %s, %s, %s)
- RETURNING artifact_id"""
- # Magic number 4 -> visibility sandbox
- # Magix number 7 -> biom artifact type
- TRN.add(sql, [analysis['timestamp'], biom_data['data_type_id'],
- 4, 7, False])
- artifact_id = TRN.execute_fetchlast()
- # Associate the artifact with the analysis
- sql = """INSERT INTO qiita.analysis_artifact
- (analysis_id, artifact_id)
- VALUES (%s, %s)"""
- TRN.add(sql, [analysis['analysis_id'], artifact_id])
- # Link the artifact with its file
- dd_id, mp = get_mountpoint('BIOM')[0]
- dir_fp = join(get_db_files_base_dir(), mp, str(artifact_id))
- if not exists(dir_fp):
- makedirs(dir_fp)
- new_table_fp = join(dir_fp, "biom_table.biom")
- with biom_open(new_table_fp, 'w') as f:
- new_table.to_hdf5(f, "Generated by Qiita")
-
- sql = """INSERT INTO qiita.filepath
- (filepath, filepath_type_id, checksum,
- checksum_algorithm_id, data_directory_id)
- VALUES (%s, %s, %s, %s, %s)
- RETURNING filepath_id"""
- # Magic number 7 -> filepath_type_id = 'biom'
- # Magic number 1 -> the checksum algorithm id
- TRN.add(sql, [basename(new_table_fp), 7,
- compute_checksum(new_table_fp), 1, dd_id])
- fp_id = TRN.execute_fetchlast()
- sql = """INSERT INTO qiita.artifact_filepath
- (artifact_id, filepath_id)
- VALUES (%s, %s)"""
- TRN.add(sql, [artifact_id, fp_id])
- TRN.execute()
-
- return artifact_id
-
-
-def create_rarefaction_job(depth, biom_artifact_id, analysis, srare_cmd_id):
- """Create a new rarefaction job
-
- Parameters
- ----------
- depth : int
- The rarefaction depth
- biom_artifact_id : int
- The artifact id of the input rarefaction biom table
- analysis : dict
- Dictionary with the analysis information
- srare_cmd_id : int
- The command id of the single rarefaction command
-
- Returns
- -------
- job_id : str
- The job id
- params : str
- The job parameters
- """
- # Add the row in the procesisng job table
- params = ('{"depth":%d,"subsample_multinomial":false,"biom_table":%s}'
- % (depth, biom_artifact_id))
- with TRN:
- # magic number 3: status -> success
- sql = """INSERT INTO qiita.processing_job
- (email, command_id, command_parameters,
- processing_job_status_id)
- VALUES (%s, %s, %s, %s)
- RETURNING processing_job_id"""
- TRN.add(sql, [analysis['email'], srare_cmd_id, params, 3])
- job_id = TRN.execute_fetchlast()
- # Step 1.2.b: Link the job with the input artifact
- sql = """INSERT INTO qiita.artifact_processing_job
- (artifact_id, processing_job_id)
- VALUES (%s, %s)"""
- TRN.add(sql, [biom_artifact_id, job_id])
- TRN.execute()
- return job_id, params
-
-
-def transfer_file_to_artifact(analysis_id, a_timestamp, command_id,
- data_type_id, params, artifact_type_id,
- filepath_id):
- """Creates a new artifact with the given filepath id
-
- Parameters
- ----------
- analysis_id : int
- The analysis id to attach the artifact
- a_timestamp : datetime.datetime
- The generated timestamp of the artifact
- command_id : int
- The command id of the artifact
- data_type_id : int
- The data type id of the artifact
- params : str
- The parameters of the artifact
- artifact_type_id : int
- The artifact type
- filepath_id : int
- The filepath id
-
- Returns
- -------
- int
- The artifact id
- """
- with TRN:
- # Add the row in the artifact table
- # Magic number 4: Visibility -> sandbox
- sql = """INSERT INTO qiita.artifact
- (generated_timestamp, command_id, data_type_id,
- command_parameters, visibility_id, artifact_type_id,
- submitted_to_vamps)
- VALUES (%s, %s, %s, %s, %s, %s, %s)
- RETURNING artifact_id"""
- TRN.add(sql, [a_timestamp, command_id, data_type_id, params, 4,
- artifact_type_id, False])
- artifact_id = TRN.execute_fetchlast()
- # Link the artifact with its file
- sql = """INSERT INTO qiita.artifact_filepath (artifact_id, filepath_id)
- VALUES (%s, %s)"""
- TRN.add(sql, [artifact_id, filepath_id])
- # Link the artifact with the analysis
- sql = """INSERT INTO qiita.analysis_artifact
- (analysis_id, artifact_id)
- VALUES (%s, %s)"""
- TRN.add(sql, [analysis_id, artifact_id])
-
- return artifact_id
-
-
-def create_rarefied_biom_artifact(analysis, srare_cmd_id, biom_data, params,
- parent_biom_artifact_id, rarefaction_job_id,
- srare_cmd_out_id):
- """Creates the rarefied biom artifact
-
- Parameters
- ----------
- analysis : dict
- The analysis information
- srare_cmd_id : int
- The command id of "Single Rarefaction"
- biom_data : dict
- The biom information
- params : str
- The processing parameters
- parent_biom_artifact_id : int
- The parent biom artifact id
- rarefaction_job_id : str
- The job id of the rarefaction job
- srare_cmd_out_id : int
- The id of the single rarefaction output
-
- Returns
- -------
- int
- The artifact id
- """
- with TRN:
- # Transfer the file to an artifact
- # Magic number 7: artifact type -> biom
- artifact_id = transfer_file_to_artifact(
- analysis['analysis_id'], analysis['timestamp'], srare_cmd_id,
- biom_data['data_type_id'], params, 7, biom_data['filepath_id'])
- # Link the artifact with its parent
- sql = """INSERT INTO qiita.parent_artifact (artifact_id, parent_id)
- VALUES (%s, %s)"""
- TRN.add(sql, [artifact_id, parent_biom_artifact_id])
- # Link the artifact as the job output
- sql = """INSERT INTO qiita.artifact_output_processing_job
- (artifact_id, processing_job_id, command_output_id)
- VALUES (%s, %s, %s)"""
- TRN.add(sql, [artifact_id, rarefaction_job_id, srare_cmd_out_id])
- return artifact_id
-
-
-def transfer_job(analysis, command_id, params, input_artifact_id, job_data,
- cmd_out_id, biom_data, output_artifact_type_id):
- """Transfers the job from the old structure to the plugin structure
-
- Parameters
- ----------
- analysis : dict
- The analysis information
- command_id : int
- The id of the command executed
- params : str
- The parameters used in the job
- input_artifact_id : int
- The id of the input artifact
- job_data : dict
- The job information
- cmd_out_id : int
- The id of the command's output
- biom_data : dict
- The biom information
- output_artifact_type_id : int
- The type of the output artifact
- """
- with TRN:
- # Create the job
- # Add the row in the processing job table
- # Magic number 3: status -> success
- sql = """INSERT INTO qiita.processing_job
- (email, command_id, command_parameters,
- processing_job_status_id)
- VALUES (%s, %s, %s, %s)
- RETURNING processing_job_id"""
- TRN.add(sql, [analysis['email'], command_id, params, 3])
- job_id = TRN.execute_fetchlast()
-
- # Link the job with the input artifact
- sql = """INSERT INTO qiita.artifact_processing_job
- (artifact_id, processing_job_id)
- VALUES (rarefied_biom_id, proc_job_id)"""
- TRN.add(sql, [input_artifact_id, job_id])
-
- # Check if the executed job has results and add them
- sql = """SELECT EXISTS(SELECT *
- FROM qiita.job_results_filepath
- WHERE job_id = %s)"""
- TRN.add(sql, [job_data['job_id']])
- if TRN.execute_fetchlast():
- # There are results for the current job.
- # Transfer the job files to a new artifact
- sql = """SELECT filepath_id
- FROM qiita.job_results_filepath
- WHERE job_id = %s"""
- TRN.add(sql, job_data['job_id'])
- filepath_id = TRN.execute_fetchlast()
- artifact_id = transfer_file_to_artifact(
- analysis['analysis_id'], analysis['timestamp'], command_id,
- biom_data['data_type_id'], params, output_artifact_type_id,
- filepath_id)
-
- # Link the artifact with its parent
- sql = """INSERT INTO qiita.parent_artifact (artifact_id, parent_id)
- VALUES (%s, %s)"""
- TRN.add(sql, [artifact_id, input_artifact_id])
- # Link the artifact as the job output
- sql = """INSERT INTO qiita.artifact_output_processing_job
- (artifact_id, processing_job_id, command_output_id)
- VALUES (%s, %s, %s)"""
- TRN.add(sql, [artifact_id, job_id, cmd_out_id])
- TRN.exeucte()
- else:
- # There are no results on the current job, so mark it as
- # error
- if job_data.log_id is None:
- # Magic number 2 - we are not using any other severity
- # level, so keep using number 2
- sql = """INSERT INTO qiita.logging (time, severity_id, msg)
- VALUES (%s, %s, %s)
- RETURNING logging_id"""
- TRN.add(sql, [analysis['timestamp'], 2,
- "Unknown error - patch 47"])
- else:
- log_id = job_data['log_id']
-
- # Magic number 4 -> status -> error
- sql = """UPDATE qiita.processing_job
- SET processing_job_status_id = 4, logging_id = %s
- WHERE processing_job_id = %s"""
- TRN.add(sql, [log_id, job_id])
-
-
-# The new commands that we are going to add generate new artifact types.
-# These new artifact types are going to be added to a different plugin.
-# In interest of time and given that the artifact type system is going to
-# change in the near future, we feel that the easiest way to transfer
-# the current analyses results is by creating 3 different types of
-# artifacts: (1) distance matrix -> which will include the distance matrix,
-# the principal coordinates and the emperor plots; (2) rarefaction
-# curves -> which will include all the files generated by alpha rarefaction
-# and (3) taxonomy summary, which will include all the files generated
-# by summarize_taxa_through_plots.py
-
-# Step 1: Create the new type
-with TRN:
- # Magic number 2 -> The "artifact definition" software type
- sql = """INSERT INTO qiita.software
- (name, version, description, environment_script, start_script,
- software_type_id)
- VALUES ('Diversity types', '0.1.0',
- 'Diversity artifacts type plugin',
- 'source activate qiita', 'start_diversity_types', 2)
- RETURNING software_id"""
- TRN.add(sql)
- divtype_id = TRN.execute_fetchlast()
-
- # Step 2: Create the validate and HTML generator commands
- sql = """INSERT INTO qiita.software_command (software_id, name, description)
- VALUES (%s, %s, %s)
- RETURNING command_id"""
- TRN.add(sql, [divtype_id, 'Validate',
- 'Validates a new artifact of the given diversity type'])
- validate_cmd_id = TRN.execute_fetchlast()
- TRN.add(sql, [divtype_id, 'Generate HTML summary',
- 'Generates the HTML summary of a given diversity type'])
- html_summary_cmd_id = TRN.execute_fetchlast()
-
- # Step 3: Add the parameters for the previous commands
- sql = """INSERT INTO qiita.command_parameter
- (command_id, parameter_name, parameter_type, required)
- VALUES (%s, %s, %s, %s)"""
- sql_args = [(validate_cmd_id, 'files', 'string', True),
- (validate_cmd_id, 'artifact_type', 'string', True),
- (html_summary_cmd_id, 'input_data', 'artifact', True)]
- TRN.add(sql, sql_args, many=True)
-
- # Step 4: Add the new artifact types
- sql = """INSERT INTO qiita.artifact_type (
- artifact_type, description, can_be_submitted_to_ebi,
- can_be_submitted_to_vamps)
- VALUES (%s, %s, %s, %s)
- RETURNING artifact_type_id"""
- TRN.add(sql, ['distance_matrix', 'Distance matrix holding pairwise '
- 'distance between samples', False, False])
- dm_atype_id = TRN.execute_fetchlast()
- TRN.add(sql, ['rarefaction_curves', 'Rarefaction curves', False, False])
- rc_atype_id = TRN.execute_fetchlast()
- TRN.add(sql, ['taxa_summary', 'Taxa summary plots', False, False])
- ts_atype_id = TRN.execute_fetchlast()
-
- # Step 5: Associate each artifact with the filetypes that it accepts
- # At this time we are going to add them as directories, just as it is done
- # right now. We can make it fancier with the new type system.
- # Magic number 8: the filepath_type_id for the directory
- sql = """INSERT INTO qiita.artifact_type_filepath_type
- (artifact_type_id, filepath_type_id, required)
- VALUES (%s, %s, %s)"""
- sql_args = [[dm_atype_id, 8, True],
- [rc_atype_id, 8, True],
- [ts_atype_id, 8, True]]
- TRN.add(sql, sql_args, many=True)
-
- # Step 6: Associate the plugin with the types that it defines
- sql = """INSERT INTO qiita.software_artifact_type
- (software_id, artifact_type_id)
- VALUES (%s, %s)"""
- sql_args = [[divtype_id, dm_atype_id],
- [divtype_id, rc_atype_id],
- [divtype_id, ts_atype_id]]
- TRN.add(sql, sql_args, many=True)
-
- # Step 7: Create the new entries for the data directory
- sql = """INSERT INTO qiita.data_directory
- (data_type, mountpoint, subdirectory, active)
- VALUES (%s, %s, %s, %s)"""
- sql_args = [['distance_matrix', 'distance_matrix', True, True],
- ['rarefaction_curves', 'rarefaction_curves', True, True],
- ['taxa_summary', 'taxa_summary', True, True]]
- TRN.add(sql, sql_args, many=True)
-
- # Create the new commands that execute the current analyses. In qiita,
- # the only commands that where available are Summarize Taxa, Beta
- # Diversity and Alpha Rarefaction. The system was executing rarefaction
- # by default, but it should be a different step in the analysis process
- # so we are going to create a command for it too. These commands are going
- # to be part of the QIIME plugin, so we are going to first retrieve the
- # id of the QIIME 1.9.1 plugin, which for sure exists cause it was added
- # in patch 33 and there is no way of removing plugins
-
- # Step 1: Get the QIIME plugin id
- sql = """SELECT software_id
- FROM qiita.software
- WHERE name = 'QIIME' AND version = '1.9.1'"""
- TRN.add(sql)
- qiime_id = TRN.execute_fetchlast()
-
- # Step 2: Insert the new commands in the software_command table
- sql = """INSERT INTO qiita.software_command (software_id, name, description)
- VALUES (%s, %s, %s)
- RETURNING command_id"""
- TRN.add(sql, [qiime_id, 'Summarize Taxa', 'Plots taxonomy summaries at '
- 'different taxonomy levels'])
- sum_taxa_cmd_id = TRN.execute_fetchlast()
- TRN.add(sql, [qiime_id, 'Beta Diversity',
- 'Computes and plots beta diversity results'])
- bdiv_cmd_id = TRN.execute_fetchlast()
- TRN.add(sql, [qiime_id, 'Alpha Rarefaction',
- 'Computes and plots alpha rarefaction results'])
- arare_cmd_id = TRN.execute_fetchlast()
- TRN.add(sql, [qiime_id, 'Single Rarefaction',
- 'Rarefies the input table by random sampling without '
- 'replacement'])
- srare_cmd_id = TRN.execute_fetchlast()
-
- # Step 3: Insert the parameters for each command
- sql = """INSERT INTO qiita.command_parameter
- (command_id, parameter_name, parameter_type, required,
- default_value)
- VALUES (%s, %s, %s, %s, %s)
- RETURNING command_parameter_id"""
- sql_args = [
- # Summarize Taxa
- (sum_taxa_cmd_id, 'metadata_category', 'string', False, ''),
- (sum_taxa_cmd_id, 'sort', 'bool', False, 'False'),
- # Beta Diversity
- (bdiv_cmd_id, 'tree', 'string', False, ''),
- (bdiv_cmd_id, 'metrics',
- 'mchoice:["abund_jaccard","binary_chisq","binary_chord",'
- '"binary_euclidean","binary_hamming","binary_jaccard",'
- '"binary_lennon","binary_ochiai","binary_otu_gain","binary_pearson",'
- '"binary_sorensen_dice","bray_curtis","bray_curtis_faith",'
- '"bray_curtis_magurran","canberra","chisq","chord","euclidean",'
- '"gower","hellinger","kulczynski","manhattan","morisita_horn",'
- '"pearson","soergel","spearman_approx","specprof","unifrac",'
- '"unifrac_g","unifrac_g_full_tree","unweighted_unifrac",'
- '"unweighted_unifrac_full_tree","weighted_normalized_unifrac",'
- '"weighted_unifrac"]', False, '["binary_jaccard","bray_curtis"]'),
- # Alpha rarefaction
- (arare_cmd_id, 'tree', 'string', False, ''),
- (arare_cmd_id, 'num_steps', 'integer', False, 10),
- (arare_cmd_id, 'min_rare_depth', 'integer', False, 10),
- (arare_cmd_id, 'max_rare_depth', 'integer', False, 'Default'),
- # Single rarefaction
- (srare_cmd_id, 'depth', 'integer', True, None),
- (srare_cmd_id, 'subsample_multinomial', 'bool', False, 'False')
- ]
- TRN.add(sql, sql_args, many=True)
-
- TRN.add(sql, [sum_taxa_cmd_id, 'biom_table', 'artifact', True, None])
- sum_taxa_cmd_param_id = TRN.execute_fetchlast()
- TRN.add(sql, [bdiv_cmd_id, 'biom_table', 'artifact', True, None])
- bdiv_cmd_param_id = TRN.execute_fetchlast()
- TRN.add(sql, [arare_cmd_id, 'biom_table', 'artifact', True, None])
- arare_cmd_param_id = TRN.execute_fetchlast()
- TRN.add(sql, [srare_cmd_id, 'biom_table', 'artifact', True, None])
- srare_cmd_param_id = TRN.execute_fetchlast()
-
- # Step 4: Connect the artifact parameters with the artifact types that
- # they accept
- sql = """SELECT artifact_type_id
- FROM qiita.artifact_type
- WHERE artifact_type = 'BIOM'"""
- TRN.add(sql)
- biom_atype_id = TRN.execute_fetchlast()
-
- sql = """INSERT INTO qiita.parameter_artifact_type
- (command_parameter_id, artifact_type_id)
- VALUES (%s, %s)"""
- sql_args = [[sum_taxa_cmd_param_id, biom_atype_id],
- [bdiv_cmd_param_id, biom_atype_id],
- [arare_cmd_param_id, biom_atype_id],
- [srare_cmd_param_id, biom_atype_id]]
- TRN.add(sql, sql_args, many=True)
-
- # Step 5: Add the outputs of the command.
- sql = """INSERT INTO qiita.command_output
- (name, command_id, artifact_type_id)
- VALUES (%s, %s, %s)
- RETURNING command_output_id"""
- TRN.add(sql, ['taxa_summary', sum_taxa_cmd_id, ts_atype_id])
- sum_taxa_cmd_out_id = TRN.execute_fetchlast()
- TRN.add(sql, ['distance_matrix', bdiv_cmd_id, dm_atype_id])
- bdiv_cmd_out_id = TRN.execute_fetchlast()
- TRN.add(sql, ['rarefaction_curves', arare_cmd_id, rc_atype_id])
- arare_cmd_out_id = TRN.execute_fetchlast()
- TRN.add(sql, ['rarefied_table', srare_cmd_id, biom_atype_id])
- srare_cmd_out_id = TRN.execute_fetchlast()
-
-# At this point we are ready to start transferring the data from the old
-# structures to the new structures. Overview of the procedure:
-# Step 1: Add initial set of artifacts up to rarefied table
-# Step 2: Transfer the "analisys jobs" to processing jobs and create
-# the analysis artifacts
-db_dir = get_db_files_base_dir()
-with TRN:
- sql = "SELECT * FROM qiita.analysis"
- TRN.add(sql)
- analysis_info = TRN.execute_fetchindex()
-
- # Loop through all the analysis
- for analysis in analysis_info:
- # Step 1: Add the inital set of artifacts. An analysis starts with
- # a set of BIOM artifacts.
- sql = """SELECT *
- FROM qiita.analysis_filepath
- JOIN qiita.filepath USING (filepath_id)
- JOIN qiita.filepath_type USING (filepath_type_id)
- WHERE analysis_id = %s AND filepath_type = 'biom'"""
- TRN.add(sql, [analysis['analysis_id']])
- analysis_bioms = TRN.execute_fetchindex()
-
- # Loop through all the biom tables associated with the current analysis
- # so we can create the initial set of artifacts
- for biom_data in analysis_bioms:
- # Get the path of the BIOM table
- sql = """SELECT filepath, mountpoint
- FROM qiita.filepath
- JOIN qiita.data_directory USING (data_directory_id)
- WHERE filepath_id = %s"""
- TRN.add(sql, [biom_data['filepath_id']])
- # Magic number 0: There is only a single row in the query result
- fp_info = TRN.execute_fetchindex()[0]
- filepath = join(db_dir, fp_info['mountpoint'], fp_info['filepath'])
-
- # We need to check if the BIOM table has been rarefied or not
- table = load_table(filepath)
- depths = set(table.sum(axis='sample'))
- if len(depths) == 1:
- # The BIOM table was rarefied
- # Create the initial unrarefied artifact
- initial_biom_artifact_id = create_non_rarefied_biom_artifact(
- analysis, biom_data, table)
- # Create the rarefaction job
- rarefaction_job_id, params = create_rarefaction_job(
- depths.pop(), initial_biom_artifact_id, analysis,
- srare_cmd_id)
- # Create the rarefied artifact
- rarefied_biom_artifact_id = create_rarefied_biom_artifact(
- analysis, srare_cmd_id, biom_data, params,
- initial_biom_artifact_id, rarefaction_job_id,
- srare_cmd_out_id)
- else:
- # The BIOM table was not rarefied, use current table as initial
- initial_biom_id = transfer_file_to_artifact()
-
- # Loop through all the jobs that used this biom table as input
- sql = """SELECT *
- FROM qiita.job
- WHERE reverse(split_part(reverse(
- options::json->>'--otu_table_fp'), '/', 1)) = %s"""
- TRN.add(sql, [filepath])
- analysis_jobs = TRN.execute_fetchindex()
- for job_data in analysis_jobs:
- # Identify which command the current job exeucted
- if job_data['command_id'] == 1:
- # Taxa summaries
- cmd_id = sum_taxa_cmd_id
- params = ('{"biom_table":%d,"metadata_category":"",'
- '"sort":false}' % initial_biom_id)
- output_artifact_type_id = ts_atype_id
- cmd_out_id = sum_taxa_cmd_out_id
- elif job_data['command_id'] == 2:
- # Beta diversity
- cmd_id = bdiv_cmd_id
- tree_fp = loads(job_data['options'])['--tree_fp']
- if tree_fp:
- params = ('{"biom_table":%d,"tree":"%s","metrics":'
- '["unweighted_unifrac","weighted_unifrac"]}'
- % (initial_biom_id, tree_fp))
- else:
- params = ('{"biom_table":%d,"metrics":["bray_curtis",'
- '"gower","canberra","pearson"]}'
- % initial_biom_id)
- output_artifact_type_id = dm_atype_id
- cmd_out_id = bdiv_cmd_out_id
- else:
- # Alpha rarefaction
- cmd_id = arare_cmd_id
- tree_fp = loads(job_data['options'])['--tree_fp']
- params = ('{"biom_table":%d,"tree":"%s","num_steps":"10",'
- '"min_rare_depth":"10",'
- '"max_rare_depth":"Default"}'
- % (initial_biom_id, tree_fp))
- output_artifact_type_id = rc_atype_id
- cmd_out_id = arare_cmd_out_id
-
- transfer_job(analysis, cmd_id, params, initial_biom_id,
- job_data, cmd_out_id, biom_data,
- output_artifact_type_id)
-
-errors = []
-with TRN:
- # Unlink the analysis from the biom table filepaths
- # Magic number 7 -> biom filepath type
- sql = """DELETE FROM qiita.analysis_filepath
- WHERE filepath_id IN (SELECT filepath_id
- FROM qiita.filepath
- WHERE filepath_type_id = 7)"""
- TRN.add(sql)
- TRN.execute()
-
- # Delete old structures that are not used anymore
- tables = ["collection_job", "collection_analysis", "collection_users",
- "collection", "collection_status", "analysis_workflow",
- "analysis_chain", "analysis_job", "job_results_filepath", "job",
- "job_status", "command_data_type", "command", "analysis_status"]
- for table in tables:
- TRN.add("DROP TABLE qiita.%s" % table)
- try:
- TRN.execute()
- except Exception as e:
- errors.append("Error deleting table %s: %s" % (table, str(e)))
-
-# Purge filepaths
-try:
- purge_filepaths()
-except Exception as e:
- errors.append("Error purging filepaths: %s" % str(e))
-
-if errors:
- print "\n".join(errors)
+from qiita_db.study import Study
+
+
+class ForRecursion(object):
+ """for some strange reason, my guess is how we are executing the patches
+ recursion doesn't work directly so decided to use a class to make it
+ work"""
+
+ @classmethod
+ def change_status(cls, artifact, status):
+ for a in artifact.children:
+ try:
+ a.visibility = status
+ except:
+ # print so we know which changes failed and we can deal by hand
+ print "failed aid: %d, status %s" % (artifact.id, status)
+ return
+ cls.change_status(a, status)
+
+
+studies = Study.get_by_status('private').union(
+ Study.get_by_status('public')).union(Study.get_by_status('sandbox'))
+# just getting the base artifacts, no parents
+artifacts = {a for s in studies for a in s.artifacts() if not a.parents}
+
+# inheriting status
+fr = ForRecursion
+for a in artifacts:
+ status = a.visibility
+ fr.change_status(a, status)
diff --git a/qiita_db/support_files/patches/python_patches/48.py b/qiita_db/support_files/patches/python_patches/48.py
new file mode 100644
index 000000000..e831f80ba
--- /dev/null
+++ b/qiita_db/support_files/patches/python_patches/48.py
@@ -0,0 +1,56 @@
+# replacing all \t and \n for space as those chars brake QIIME
+
+from qiita_db.study import Study
+from qiita_db.sql_connection import TRN
+
+
+def searcher(df):
+ search = r"\t|\n"
+
+ return [col for col in df
+ if df[col].str.contains(search, na=False, regex=True).any()]
+
+
+studies = Study.get_by_status('private').union(
+ Study.get_by_status('public')).union(Study.get_by_status('sandbox'))
+
+# we will start search using pandas as is much easier and faster
+# than using pgsql. remember that to_dataframe actually transforms what's
+# in the db
+to_fix = []
+for s in studies:
+ st = s.sample_template
+ if st is None:
+ continue
+ cols = searcher(st.to_dataframe())
+ if cols:
+ to_fix.append((st, cols))
+
+ for pt in s.prep_templates():
+ if pt is None:
+ continue
+ cols = searcher(pt.to_dataframe())
+ if cols:
+ to_fix.append((pt, cols))
+
+
+# now let's fix the database and regenerate the files
+for infofile, cols in to_fix:
+ with TRN:
+ for col in cols:
+ # removing tabs
+ sql = """UPDATE qiita.{0}{1}
+ SET {2} = replace({2}, chr(9), ' ')""".format(
+ infofile._table_prefix, infofile.id, col)
+ TRN.add(sql)
+
+ # removing enters
+ sql = """UPDATE qiita.{0}{1}
+ SET {2} = regexp_replace(
+ {2}, E'[\\n\\r\\u2028]+', ' ', 'g' )""".format(
+ infofile._table_prefix, infofile.id, col)
+ TRN.add(sql)
+
+ TRN.execute()
+
+ infofile.generate_files()
diff --git a/qiita_db/support_files/patches/python_patches/51.py b/qiita_db/support_files/patches/python_patches/51.py
new file mode 100644
index 000000000..43f1b65a9
--- /dev/null
+++ b/qiita_db/support_files/patches/python_patches/51.py
@@ -0,0 +1,688 @@
+# The code is commented with details on the changes implemented here,
+# but here is an overview of the changes needed to transfer the analysis
+# data to the plugins structure:
+# 1) Create a new type plugin to define the diversity types
+# 2) Create the new commands on the existing QIIME plugin to execute the
+# existing analyses (beta div, taxa summaries and alpha rarefaction)
+# 3) Transfer all the data in the old structures to the plugin structures
+# 4) Delete old structures
+
+from os.path import join, exists, basename
+from os import makedirs
+from json import loads
+
+from biom import load_table, Table
+from biom.util import biom_open
+
+from qiita_db.sql_connection import TRN
+from qiita_db.util import (get_db_files_base_dir, purge_filepaths,
+ get_mountpoint, compute_checksum)
+from qiita_db.artifact import Artifact
+
+# Create some aux functions that are going to make the code more modular
+# and easier to understand, since there is a fair amount of work to do to
+# trasnfer the data from the old structure to the new one
+
+
+def create_non_rarefied_biom_artifact(analysis, biom_data, rarefied_table):
+ """Creates the initial non-rarefied BIOM artifact of the analysis
+
+ Parameters
+ ----------
+ analysis : dict
+ Dictionary with the analysis information
+ biom_data : dict
+ Dictionary with the biom file information
+ rarefied_table : biom.Table
+ The rarefied BIOM table
+
+ Returns
+ -------
+ int
+ The id of the new artifact
+ """
+ # The non rarefied biom artifact is the initial biom table of the analysis.
+ # This table does not currently exist anywhere, so we need to actually
+ # create the BIOM file. To create this BIOM file we need: (1) the samples
+ # and artifacts they come from and (2) whether the samples where
+ # renamed or not. (1) is on the database, but we need to inferr (2) from
+ # the existing rarefied BIOM table. Fun, fun...
+
+ with TRN:
+ # Get the samples included in the BIOM table grouped by artifact id
+ # Note that the analysis contains a BIOM table per data type included
+ # in it, and the table analysis_sample does not differentiate between
+ # datatypes, so we need to check the data type in the artifact table
+ sql = """SELECT artifact_id, array_agg(sample_id)
+ FROM qiita.analysis_sample
+ JOIN qiita.artifact USING (artifact_id)
+ WHERE analysis_id = %s AND data_type_id = %s
+ GROUP BY artifact_id"""
+ TRN.add(sql, [analysis['analysis_id'], biom_data['data_type_id']])
+ samples_by_artifact = TRN.execute_fetchindex()
+
+ # Create an empty BIOM table to be the new master table
+ new_table = Table([], [], [])
+ ids_map = {}
+ for a_id, samples in samples_by_artifact:
+ # Get the filepath of the BIOM table from the artifact
+ artifact = Artifact(a_id)
+ biom_fp = None
+ for _, fp, fp_type in artifact.filepaths:
+ if fp_type == 'biom':
+ biom_fp = fp
+ # Note that we are sure that the biom table exists for sure, so
+ # no need to check if biom_fp is undefined
+ biom_table = load_table(biom_fp)
+ biom_table.filter(samples, axis='sample', inplace=True)
+ new_table = new_table.merge(biom_table)
+ ids_map.update({sid: "%d.%s" % (a_id, sid)
+ for sid in biom_table.ids()})
+
+ # Check if we need to rename the sample ids in the biom table
+ new_table_ids = set(new_table.ids())
+ if not new_table_ids.issuperset(rarefied_table.ids()):
+ # We need to rename the sample ids
+ new_table.update_ids(ids_map, 'sample', True, True)
+
+ sql = """INSERT INTO qiita.artifact
+ (generated_timestamp, data_type_id, visibility_id,
+ artifact_type_id, submitted_to_vamps)
+ VALUES (%s, %s, %s, %s, %s)
+ RETURNING artifact_id"""
+ # Magic number 4 -> visibility sandbox
+ # Magix number 7 -> biom artifact type
+ TRN.add(sql, [analysis['timestamp'], biom_data['data_type_id'],
+ 4, 7, False])
+ artifact_id = TRN.execute_fetchlast()
+ # Associate the artifact with the analysis
+ sql = """INSERT INTO qiita.analysis_artifact
+ (analysis_id, artifact_id)
+ VALUES (%s, %s)"""
+ TRN.add(sql, [analysis['analysis_id'], artifact_id])
+ # Link the artifact with its file
+ dd_id, mp = get_mountpoint('BIOM')[0]
+ dir_fp = join(get_db_files_base_dir(), mp, str(artifact_id))
+ if not exists(dir_fp):
+ makedirs(dir_fp)
+ new_table_fp = join(dir_fp, "biom_table.biom")
+ with biom_open(new_table_fp, 'w') as f:
+ new_table.to_hdf5(f, "Generated by Qiita")
+
+ sql = """INSERT INTO qiita.filepath
+ (filepath, filepath_type_id, checksum,
+ checksum_algorithm_id, data_directory_id)
+ VALUES (%s, %s, %s, %s, %s)
+ RETURNING filepath_id"""
+ # Magic number 7 -> filepath_type_id = 'biom'
+ # Magic number 1 -> the checksum algorithm id
+ TRN.add(sql, [basename(new_table_fp), 7,
+ compute_checksum(new_table_fp), 1, dd_id])
+ fp_id = TRN.execute_fetchlast()
+ sql = """INSERT INTO qiita.artifact_filepath
+ (artifact_id, filepath_id)
+ VALUES (%s, %s)"""
+ TRN.add(sql, [artifact_id, fp_id])
+ TRN.execute()
+
+ return artifact_id
+
+
+def create_rarefaction_job(depth, biom_artifact_id, analysis, srare_cmd_id):
+ """Create a new rarefaction job
+
+ Parameters
+ ----------
+ depth : int
+ The rarefaction depth
+ biom_artifact_id : int
+ The artifact id of the input rarefaction biom table
+ analysis : dict
+ Dictionary with the analysis information
+ srare_cmd_id : int
+ The command id of the single rarefaction command
+
+ Returns
+ -------
+ job_id : str
+ The job id
+ params : str
+ The job parameters
+ """
+ # Add the row in the procesisng job table
+ params = ('{"depth":%d,"subsample_multinomial":false,"biom_table":%s}'
+ % (depth, biom_artifact_id))
+ with TRN:
+ # magic number 3: status -> success
+ sql = """INSERT INTO qiita.processing_job
+ (email, command_id, command_parameters,
+ processing_job_status_id)
+ VALUES (%s, %s, %s, %s)
+ RETURNING processing_job_id"""
+ TRN.add(sql, [analysis['email'], srare_cmd_id, params, 3])
+ job_id = TRN.execute_fetchlast()
+ # Step 1.2.b: Link the job with the input artifact
+ sql = """INSERT INTO qiita.artifact_processing_job
+ (artifact_id, processing_job_id)
+ VALUES (%s, %s)"""
+ TRN.add(sql, [biom_artifact_id, job_id])
+ TRN.execute()
+ return job_id, params
+
+
+def transfer_file_to_artifact(analysis_id, a_timestamp, command_id,
+ data_type_id, params, artifact_type_id,
+ filepath_id):
+ """Creates a new artifact with the given filepath id
+
+ Parameters
+ ----------
+ analysis_id : int
+ The analysis id to attach the artifact
+ a_timestamp : datetime.datetime
+ The generated timestamp of the artifact
+ command_id : int
+ The command id of the artifact
+ data_type_id : int
+ The data type id of the artifact
+ params : str
+ The parameters of the artifact
+ artifact_type_id : int
+ The artifact type
+ filepath_id : int
+ The filepath id
+
+ Returns
+ -------
+ int
+ The artifact id
+ """
+ with TRN:
+ # Add the row in the artifact table
+ # Magic number 4: Visibility -> sandbox
+ sql = """INSERT INTO qiita.artifact
+ (generated_timestamp, command_id, data_type_id,
+ command_parameters, visibility_id, artifact_type_id,
+ submitted_to_vamps)
+ VALUES (%s, %s, %s, %s, %s, %s, %s)
+ RETURNING artifact_id"""
+ TRN.add(sql, [a_timestamp, command_id, data_type_id, params, 4,
+ artifact_type_id, False])
+ artifact_id = TRN.execute_fetchlast()
+ # Link the artifact with its file
+ sql = """INSERT INTO qiita.artifact_filepath (artifact_id, filepath_id)
+ VALUES (%s, %s)"""
+ TRN.add(sql, [artifact_id, filepath_id])
+ # Link the artifact with the analysis
+ sql = """INSERT INTO qiita.analysis_artifact
+ (analysis_id, artifact_id)
+ VALUES (%s, %s)"""
+ TRN.add(sql, [analysis_id, artifact_id])
+
+ return artifact_id
+
+
+def create_rarefied_biom_artifact(analysis, srare_cmd_id, biom_data, params,
+ parent_biom_artifact_id, rarefaction_job_id,
+ srare_cmd_out_id):
+ """Creates the rarefied biom artifact
+
+ Parameters
+ ----------
+ analysis : dict
+ The analysis information
+ srare_cmd_id : int
+ The command id of "Single Rarefaction"
+ biom_data : dict
+ The biom information
+ params : str
+ The processing parameters
+ parent_biom_artifact_id : int
+ The parent biom artifact id
+ rarefaction_job_id : str
+ The job id of the rarefaction job
+ srare_cmd_out_id : int
+ The id of the single rarefaction output
+
+ Returns
+ -------
+ int
+ The artifact id
+ """
+ with TRN:
+ # Transfer the file to an artifact
+ # Magic number 7: artifact type -> biom
+ artifact_id = transfer_file_to_artifact(
+ analysis['analysis_id'], analysis['timestamp'], srare_cmd_id,
+ biom_data['data_type_id'], params, 7, biom_data['filepath_id'])
+ # Link the artifact with its parent
+ sql = """INSERT INTO qiita.parent_artifact (artifact_id, parent_id)
+ VALUES (%s, %s)"""
+ TRN.add(sql, [artifact_id, parent_biom_artifact_id])
+ # Link the artifact as the job output
+ sql = """INSERT INTO qiita.artifact_output_processing_job
+ (artifact_id, processing_job_id, command_output_id)
+ VALUES (%s, %s, %s)"""
+ TRN.add(sql, [artifact_id, rarefaction_job_id, srare_cmd_out_id])
+ return artifact_id
+
+
+def transfer_job(analysis, command_id, params, input_artifact_id, job_data,
+ cmd_out_id, biom_data, output_artifact_type_id):
+ """Transfers the job from the old structure to the plugin structure
+
+ Parameters
+ ----------
+ analysis : dict
+ The analysis information
+ command_id : int
+ The id of the command executed
+ params : str
+ The parameters used in the job
+ input_artifact_id : int
+ The id of the input artifact
+ job_data : dict
+ The job information
+ cmd_out_id : int
+ The id of the command's output
+ biom_data : dict
+ The biom information
+ output_artifact_type_id : int
+ The type of the output artifact
+ """
+ with TRN:
+ # Create the job
+ # Add the row in the processing job table
+ # Magic number 3: status -> success
+ sql = """INSERT INTO qiita.processing_job
+ (email, command_id, command_parameters,
+ processing_job_status_id)
+ VALUES (%s, %s, %s, %s)
+ RETURNING processing_job_id"""
+ TRN.add(sql, [analysis['email'], command_id, params, 3])
+ job_id = TRN.execute_fetchlast()
+
+ # Link the job with the input artifact
+ sql = """INSERT INTO qiita.artifact_processing_job
+ (artifact_id, processing_job_id)
+ VALUES (rarefied_biom_id, proc_job_id)"""
+ TRN.add(sql, [input_artifact_id, job_id])
+
+ # Check if the executed job has results and add them
+ sql = """SELECT EXISTS(SELECT *
+ FROM qiita.job_results_filepath
+ WHERE job_id = %s)"""
+ TRN.add(sql, [job_data['job_id']])
+ if TRN.execute_fetchlast():
+ # There are results for the current job.
+ # Transfer the job files to a new artifact
+ sql = """SELECT filepath_id
+ FROM qiita.job_results_filepath
+ WHERE job_id = %s"""
+ TRN.add(sql, job_data['job_id'])
+ filepath_id = TRN.execute_fetchlast()
+ artifact_id = transfer_file_to_artifact(
+ analysis['analysis_id'], analysis['timestamp'], command_id,
+ biom_data['data_type_id'], params, output_artifact_type_id,
+ filepath_id)
+
+ # Link the artifact with its parent
+ sql = """INSERT INTO qiita.parent_artifact (artifact_id, parent_id)
+ VALUES (%s, %s)"""
+ TRN.add(sql, [artifact_id, input_artifact_id])
+ # Link the artifact as the job output
+ sql = """INSERT INTO qiita.artifact_output_processing_job
+ (artifact_id, processing_job_id, command_output_id)
+ VALUES (%s, %s, %s)"""
+ TRN.add(sql, [artifact_id, job_id, cmd_out_id])
+ TRN.exeucte()
+ else:
+ # There are no results on the current job, so mark it as
+ # error
+ if job_data.log_id is None:
+ # Magic number 2 - we are not using any other severity
+ # level, so keep using number 2
+ sql = """INSERT INTO qiita.logging (time, severity_id, msg)
+ VALUES (%s, %s, %s)
+ RETURNING logging_id"""
+ TRN.add(sql, [analysis['timestamp'], 2,
+ "Unknown error - patch 47"])
+ else:
+ log_id = job_data['log_id']
+
+ # Magic number 4 -> status -> error
+ sql = """UPDATE qiita.processing_job
+ SET processing_job_status_id = 4, logging_id = %s
+ WHERE processing_job_id = %s"""
+ TRN.add(sql, [log_id, job_id])
+
+
+# The new commands that we are going to add generate new artifact types.
+# These new artifact types are going to be added to a different plugin.
+# In interest of time and given that the artifact type system is going to
+# change in the near future, we feel that the easiest way to transfer
+# the current analyses results is by creating 3 different types of
+# artifacts: (1) distance matrix -> which will include the distance matrix,
+# the principal coordinates and the emperor plots; (2) rarefaction
+# curves -> which will include all the files generated by alpha rarefaction
+# and (3) taxonomy summary, which will include all the files generated
+# by summarize_taxa_through_plots.py
+
+# Step 1: Create the new type
+with TRN:
+ # Magic number 2 -> The "artifact definition" software type
+ sql = """INSERT INTO qiita.software
+ (name, version, description, environment_script, start_script,
+ software_type_id)
+ VALUES ('Diversity types', '0.1.0',
+ 'Diversity artifacts type plugin',
+ 'source activate qiita', 'start_diversity_types', 2)
+ RETURNING software_id"""
+ TRN.add(sql)
+ divtype_id = TRN.execute_fetchlast()
+
+ # Step 2: Create the validate and HTML generator commands
+ sql = """INSERT INTO qiita.software_command (software_id, name, description)
+ VALUES (%s, %s, %s)
+ RETURNING command_id"""
+ TRN.add(sql, [divtype_id, 'Validate',
+ 'Validates a new artifact of the given diversity type'])
+ validate_cmd_id = TRN.execute_fetchlast()
+ TRN.add(sql, [divtype_id, 'Generate HTML summary',
+ 'Generates the HTML summary of a given diversity type'])
+ html_summary_cmd_id = TRN.execute_fetchlast()
+
+ # Step 3: Add the parameters for the previous commands
+ sql = """INSERT INTO qiita.command_parameter
+ (command_id, parameter_name, parameter_type, required)
+ VALUES (%s, %s, %s, %s)"""
+ sql_args = [(validate_cmd_id, 'files', 'string', True),
+ (validate_cmd_id, 'artifact_type', 'string', True),
+ (html_summary_cmd_id, 'input_data', 'artifact', True)]
+ TRN.add(sql, sql_args, many=True)
+
+ # Step 4: Add the new artifact types
+ sql = """INSERT INTO qiita.artifact_type (
+ artifact_type, description, can_be_submitted_to_ebi,
+ can_be_submitted_to_vamps)
+ VALUES (%s, %s, %s, %s)
+ RETURNING artifact_type_id"""
+ TRN.add(sql, ['distance_matrix', 'Distance matrix holding pairwise '
+ 'distance between samples', False, False])
+ dm_atype_id = TRN.execute_fetchlast()
+ TRN.add(sql, ['rarefaction_curves', 'Rarefaction curves', False, False])
+ rc_atype_id = TRN.execute_fetchlast()
+ TRN.add(sql, ['taxa_summary', 'Taxa summary plots', False, False])
+ ts_atype_id = TRN.execute_fetchlast()
+
+ # Step 5: Associate each artifact with the filetypes that it accepts
+ # At this time we are going to add them as directories, just as it is done
+ # right now. We can make it fancier with the new type system.
+ # Magic number 8: the filepath_type_id for the directory
+ sql = """INSERT INTO qiita.artifact_type_filepath_type
+ (artifact_type_id, filepath_type_id, required)
+ VALUES (%s, %s, %s)"""
+ sql_args = [[dm_atype_id, 8, True],
+ [rc_atype_id, 8, True],
+ [ts_atype_id, 8, True]]
+ TRN.add(sql, sql_args, many=True)
+
+ # Step 6: Associate the plugin with the types that it defines
+ sql = """INSERT INTO qiita.software_artifact_type
+ (software_id, artifact_type_id)
+ VALUES (%s, %s)"""
+ sql_args = [[divtype_id, dm_atype_id],
+ [divtype_id, rc_atype_id],
+ [divtype_id, ts_atype_id]]
+ TRN.add(sql, sql_args, many=True)
+
+ # Step 7: Create the new entries for the data directory
+ sql = """INSERT INTO qiita.data_directory
+ (data_type, mountpoint, subdirectory, active)
+ VALUES (%s, %s, %s, %s)"""
+ sql_args = [['distance_matrix', 'distance_matrix', True, True],
+ ['rarefaction_curves', 'rarefaction_curves', True, True],
+ ['taxa_summary', 'taxa_summary', True, True]]
+ TRN.add(sql, sql_args, many=True)
+
+ # Create the new commands that execute the current analyses. In qiita,
+ # the only commands that where available are Summarize Taxa, Beta
+ # Diversity and Alpha Rarefaction. The system was executing rarefaction
+ # by default, but it should be a different step in the analysis process
+ # so we are going to create a command for it too. These commands are going
+ # to be part of the QIIME plugin, so we are going to first retrieve the
+ # id of the QIIME 1.9.1 plugin, which for sure exists cause it was added
+ # in patch 33 and there is no way of removing plugins
+
+ # Step 1: Get the QIIME plugin id
+ sql = """SELECT software_id
+ FROM qiita.software
+ WHERE name = 'QIIME' AND version = '1.9.1'"""
+ TRN.add(sql)
+ qiime_id = TRN.execute_fetchlast()
+
+ # Step 2: Insert the new commands in the software_command table
+ sql = """INSERT INTO qiita.software_command (software_id, name, description)
+ VALUES (%s, %s, %s)
+ RETURNING command_id"""
+ TRN.add(sql, [qiime_id, 'Summarize Taxa', 'Plots taxonomy summaries at '
+ 'different taxonomy levels'])
+ sum_taxa_cmd_id = TRN.execute_fetchlast()
+ TRN.add(sql, [qiime_id, 'Beta Diversity',
+ 'Computes and plots beta diversity results'])
+ bdiv_cmd_id = TRN.execute_fetchlast()
+ TRN.add(sql, [qiime_id, 'Alpha Rarefaction',
+ 'Computes and plots alpha rarefaction results'])
+ arare_cmd_id = TRN.execute_fetchlast()
+ TRN.add(sql, [qiime_id, 'Single Rarefaction',
+ 'Rarefies the input table by random sampling without '
+ 'replacement'])
+ srare_cmd_id = TRN.execute_fetchlast()
+
+ # Step 3: Insert the parameters for each command
+ sql = """INSERT INTO qiita.command_parameter
+ (command_id, parameter_name, parameter_type, required,
+ default_value)
+ VALUES (%s, %s, %s, %s, %s)
+ RETURNING command_parameter_id"""
+ sql_args = [
+ # Summarize Taxa
+ (sum_taxa_cmd_id, 'metadata_category', 'string', False, ''),
+ (sum_taxa_cmd_id, 'sort', 'bool', False, 'False'),
+ # Beta Diversity
+ (bdiv_cmd_id, 'tree', 'string', False, ''),
+ (bdiv_cmd_id, 'metrics',
+ 'mchoice:["abund_jaccard","binary_chisq","binary_chord",'
+ '"binary_euclidean","binary_hamming","binary_jaccard",'
+ '"binary_lennon","binary_ochiai","binary_otu_gain","binary_pearson",'
+ '"binary_sorensen_dice","bray_curtis","bray_curtis_faith",'
+ '"bray_curtis_magurran","canberra","chisq","chord","euclidean",'
+ '"gower","hellinger","kulczynski","manhattan","morisita_horn",'
+ '"pearson","soergel","spearman_approx","specprof","unifrac",'
+ '"unifrac_g","unifrac_g_full_tree","unweighted_unifrac",'
+ '"unweighted_unifrac_full_tree","weighted_normalized_unifrac",'
+ '"weighted_unifrac"]', False, '["binary_jaccard","bray_curtis"]'),
+ # Alpha rarefaction
+ (arare_cmd_id, 'tree', 'string', False, ''),
+ (arare_cmd_id, 'num_steps', 'integer', False, 10),
+ (arare_cmd_id, 'min_rare_depth', 'integer', False, 10),
+ (arare_cmd_id, 'max_rare_depth', 'integer', False, 'Default'),
+ # Single rarefaction
+ (srare_cmd_id, 'depth', 'integer', True, None),
+ (srare_cmd_id, 'subsample_multinomial', 'bool', False, 'False')
+ ]
+ TRN.add(sql, sql_args, many=True)
+
+ TRN.add(sql, [sum_taxa_cmd_id, 'biom_table', 'artifact', True, None])
+ sum_taxa_cmd_param_id = TRN.execute_fetchlast()
+ TRN.add(sql, [bdiv_cmd_id, 'biom_table', 'artifact', True, None])
+ bdiv_cmd_param_id = TRN.execute_fetchlast()
+ TRN.add(sql, [arare_cmd_id, 'biom_table', 'artifact', True, None])
+ arare_cmd_param_id = TRN.execute_fetchlast()
+ TRN.add(sql, [srare_cmd_id, 'biom_table', 'artifact', True, None])
+ srare_cmd_param_id = TRN.execute_fetchlast()
+
+ # Step 4: Connect the artifact parameters with the artifact types that
+ # they accept
+ sql = """SELECT artifact_type_id
+ FROM qiita.artifact_type
+ WHERE artifact_type = 'BIOM'"""
+ TRN.add(sql)
+ biom_atype_id = TRN.execute_fetchlast()
+
+ sql = """INSERT INTO qiita.parameter_artifact_type
+ (command_parameter_id, artifact_type_id)
+ VALUES (%s, %s)"""
+ sql_args = [[sum_taxa_cmd_param_id, biom_atype_id],
+ [bdiv_cmd_param_id, biom_atype_id],
+ [arare_cmd_param_id, biom_atype_id],
+ [srare_cmd_param_id, biom_atype_id]]
+ TRN.add(sql, sql_args, many=True)
+
+ # Step 5: Add the outputs of the command.
+ sql = """INSERT INTO qiita.command_output
+ (name, command_id, artifact_type_id)
+ VALUES (%s, %s, %s)
+ RETURNING command_output_id"""
+ TRN.add(sql, ['taxa_summary', sum_taxa_cmd_id, ts_atype_id])
+ sum_taxa_cmd_out_id = TRN.execute_fetchlast()
+ TRN.add(sql, ['distance_matrix', bdiv_cmd_id, dm_atype_id])
+ bdiv_cmd_out_id = TRN.execute_fetchlast()
+ TRN.add(sql, ['rarefaction_curves', arare_cmd_id, rc_atype_id])
+ arare_cmd_out_id = TRN.execute_fetchlast()
+ TRN.add(sql, ['rarefied_table', srare_cmd_id, biom_atype_id])
+ srare_cmd_out_id = TRN.execute_fetchlast()
+
+# At this point we are ready to start transferring the data from the old
+# structures to the new structures. Overview of the procedure:
+# Step 1: Add initial set of artifacts up to rarefied table
+# Step 2: Transfer the "analisys jobs" to processing jobs and create
+# the analysis artifacts
+db_dir = get_db_files_base_dir()
+with TRN:
+ sql = "SELECT * FROM qiita.analysis"
+ TRN.add(sql)
+ analysis_info = TRN.execute_fetchindex()
+
+ # Loop through all the analysis
+ for analysis in analysis_info:
+ # Step 1: Add the inital set of artifacts. An analysis starts with
+ # a set of BIOM artifacts.
+ sql = """SELECT *
+ FROM qiita.analysis_filepath
+ JOIN qiita.filepath USING (filepath_id)
+ JOIN qiita.filepath_type USING (filepath_type_id)
+ WHERE analysis_id = %s AND filepath_type = 'biom'"""
+ TRN.add(sql, [analysis['analysis_id']])
+ analysis_bioms = TRN.execute_fetchindex()
+
+ # Loop through all the biom tables associated with the current analysis
+ # so we can create the initial set of artifacts
+ for biom_data in analysis_bioms:
+ # Get the path of the BIOM table
+ sql = """SELECT filepath, mountpoint
+ FROM qiita.filepath
+ JOIN qiita.data_directory USING (data_directory_id)
+ WHERE filepath_id = %s"""
+ TRN.add(sql, [biom_data['filepath_id']])
+ # Magic number 0: There is only a single row in the query result
+ fp_info = TRN.execute_fetchindex()[0]
+ filepath = join(db_dir, fp_info['mountpoint'], fp_info['filepath'])
+
+ # We need to check if the BIOM table has been rarefied or not
+ table = load_table(filepath)
+ depths = set(table.sum(axis='sample'))
+ if len(depths) == 1:
+ # The BIOM table was rarefied
+ # Create the initial unrarefied artifact
+ initial_biom_artifact_id = create_non_rarefied_biom_artifact(
+ analysis, biom_data, table)
+ # Create the rarefaction job
+ rarefaction_job_id, params = create_rarefaction_job(
+ depths.pop(), initial_biom_artifact_id, analysis,
+ srare_cmd_id)
+ # Create the rarefied artifact
+ rarefied_biom_artifact_id = create_rarefied_biom_artifact(
+ analysis, srare_cmd_id, biom_data, params,
+ initial_biom_artifact_id, rarefaction_job_id,
+ srare_cmd_out_id)
+ else:
+ # The BIOM table was not rarefied, use current table as initial
+ initial_biom_id = transfer_file_to_artifact()
+
+ # Loop through all the jobs that used this biom table as input
+ sql = """SELECT *
+ FROM qiita.job
+ WHERE reverse(split_part(reverse(
+ options::json->>'--otu_table_fp'), '/', 1)) = %s"""
+ TRN.add(sql, [filepath])
+ analysis_jobs = TRN.execute_fetchindex()
+ for job_data in analysis_jobs:
+ # Identify which command the current job exeucted
+ if job_data['command_id'] == 1:
+ # Taxa summaries
+ cmd_id = sum_taxa_cmd_id
+ params = ('{"biom_table":%d,"metadata_category":"",'
+ '"sort":false}' % initial_biom_id)
+ output_artifact_type_id = ts_atype_id
+ cmd_out_id = sum_taxa_cmd_out_id
+ elif job_data['command_id'] == 2:
+ # Beta diversity
+ cmd_id = bdiv_cmd_id
+ tree_fp = loads(job_data['options'])['--tree_fp']
+ if tree_fp:
+ params = ('{"biom_table":%d,"tree":"%s","metrics":'
+ '["unweighted_unifrac","weighted_unifrac"]}'
+ % (initial_biom_id, tree_fp))
+ else:
+ params = ('{"biom_table":%d,"metrics":["bray_curtis",'
+ '"gower","canberra","pearson"]}'
+ % initial_biom_id)
+ output_artifact_type_id = dm_atype_id
+ cmd_out_id = bdiv_cmd_out_id
+ else:
+ # Alpha rarefaction
+ cmd_id = arare_cmd_id
+ tree_fp = loads(job_data['options'])['--tree_fp']
+ params = ('{"biom_table":%d,"tree":"%s","num_steps":"10",'
+ '"min_rare_depth":"10",'
+ '"max_rare_depth":"Default"}'
+ % (initial_biom_id, tree_fp))
+ output_artifact_type_id = rc_atype_id
+ cmd_out_id = arare_cmd_out_id
+
+ transfer_job(analysis, cmd_id, params, initial_biom_id,
+ job_data, cmd_out_id, biom_data,
+ output_artifact_type_id)
+
+errors = []
+with TRN:
+ # Unlink the analysis from the biom table filepaths
+ # Magic number 7 -> biom filepath type
+ sql = """DELETE FROM qiita.analysis_filepath
+ WHERE filepath_id IN (SELECT filepath_id
+ FROM qiita.filepath
+ WHERE filepath_type_id = 7)"""
+ TRN.add(sql)
+ TRN.execute()
+
+ # Delete old structures that are not used anymore
+ tables = ["collection_job", "collection_analysis", "collection_users",
+ "collection", "collection_status", "analysis_workflow",
+ "analysis_chain", "analysis_job", "job_results_filepath", "job",
+ "job_status", "command_data_type", "command", "analysis_status"]
+ for table in tables:
+ TRN.add("DROP TABLE qiita.%s" % table)
+ try:
+ TRN.execute()
+ except Exception as e:
+ errors.append("Error deleting table %s: %s" % (table, str(e)))
+
+# Purge filepaths
+try:
+ purge_filepaths()
+except Exception as e:
+ errors.append("Error purging filepaths: %s" % str(e))
+
+if errors:
+ print "\n".join(errors)
diff --git a/qiita_db/support_files/populate_test_db.sql b/qiita_db/support_files/populate_test_db.sql
index a04051ba4..24f46b77a 100644
--- a/qiita_db/support_files/populate_test_db.sql
+++ b/qiita_db/support_files/populate_test_db.sql
@@ -11,7 +11,7 @@ INSERT INTO qiita.qiita_user (email, user_level_id, password, name,
'$2a$12$gnUi8Qg.0tvW243v889BhOBhWLIHyIJjjgaG6dxuRJkUM8nXG9Efe', 'Dude',
'Nowhere University', '123 fake st, Apt 0, Faketown, CO 80302',
'111-222-3344'),
- ('shared@foo.bar', 3,
+ ('shared@foo.bar', 4,
'$2a$12$gnUi8Qg.0tvW243v889BhOBhWLIHyIJjjgaG6dxuRJkUM8nXG9Efe', 'Shared',
'Nowhere University', '123 fake st, Apt 0, Faketown, CO 80302',
'111-222-3344'),
diff --git a/qiita_db/support_files/qiita-db.dbs b/qiita_db/support_files/qiita-db.dbs
index 6e65c748b..ca22db34c 100644
--- a/qiita_db/support_files/qiita-db.dbs
+++ b/qiita_db/support_files/qiita-db.dbs
@@ -863,6 +863,26 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
What portals are available to show a study in
@@ -1491,6 +1511,23 @@ Controlled Vocabulary]]>