Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 14 additions & 25 deletions qiita_db/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -655,23 +655,19 @@ def build_files(self, rarefaction_depth=None):
if rarefaction_depth <= 0:
raise ValueError("rarefaction_depth must be greater than 0")

conn_handler = SQLConnectionHandler()
samples = self._get_samples(conn_handler=conn_handler)
self._build_mapping_file(samples, conn_handler=conn_handler)
self._build_biom_tables(samples, rarefaction_depth,
conn_handler=conn_handler)
samples = self._get_samples()
self._build_mapping_file(samples)
self._build_biom_tables(samples, rarefaction_depth)

def _get_samples(self, conn_handler=None):
def _get_samples(self):
"""Retrieves dict of samples to proc_data_id for the analysis"""
conn_handler = conn_handler if conn_handler is not None \
else SQLConnectionHandler()
conn_handler = SQLConnectionHandler()
sql = ("SELECT processed_data_id, array_agg(sample_id ORDER BY "
"sample_id) FROM qiita.analysis_sample WHERE analysis_id = %s "
"GROUP BY processed_data_id")
return dict(conn_handler.execute_fetchall(sql, [self._id]))

def _build_biom_tables(self, samples, rarefaction_depth,
conn_handler=None):
def _build_biom_tables(self, samples, rarefaction_depth):
"""Build tables and add them to the analysis"""
# filter and combine all study BIOM tables needed for each data type
new_tables = {dt: None for dt in self.data_types}
Expand Down Expand Up @@ -701,8 +697,6 @@ def _build_biom_tables(self, samples, rarefaction_depth,
new_tables[data_type] = new_tables[data_type].merge(table)

# add the new tables to the analysis
conn_handler = conn_handler if conn_handler is not None \
else SQLConnectionHandler()
_, base_fp = get_mountpoint(self._table)[0]
for dt, biom_table in viewitems(new_tables):
# rarefy, if specified
Expand All @@ -714,14 +708,12 @@ def _build_biom_tables(self, samples, rarefaction_depth,
biom_table.to_hdf5(f, "Analysis %s Datatype %s" %
(self._id, dt))
self._add_file("%d_analysis_%s.biom" % (self._id, dt),
"biom", data_type=dt, conn_handler=conn_handler)
"biom", data_type=dt)

def _build_mapping_file(self, samples, conn_handler=None):
def _build_mapping_file(self, samples):
"""Builds the combined mapping file for all samples
Code modified slightly from qiime.util.MetadataMap.__add__"""
conn_handler = conn_handler if conn_handler is not None \
else SQLConnectionHandler()

conn_handler = SQLConnectionHandler()
all_sample_ids = set()
sql = """SELECT filepath_id, filepath
FROM qiita.filepath
Expand Down Expand Up @@ -777,10 +769,9 @@ def _build_mapping_file(self, samples, conn_handler=None):
merged_map.to_csv(mapping_fp, index_label='#SampleID',
na_rep='unknown', sep='\t')

self._add_file("%d_analysis_mapping.txt" % self._id,
"plain_text", conn_handler=conn_handler)
self._add_file("%d_analysis_mapping.txt" % self._id, "plain_text")

def _add_file(self, filename, filetype, data_type=None, conn_handler=None):
def _add_file(self, filename, filetype, data_type=None):
"""adds analysis item to database

Parameters
Expand All @@ -789,13 +780,11 @@ def _add_file(self, filename, filetype, data_type=None, conn_handler=None):
filename to add to analysis
filetype : {plain_text, biom}
data_type : str, optional
conn_handler : SQLConnectionHandler object, optional
"""
conn_handler = conn_handler if conn_handler is not None \
else SQLConnectionHandler()
conn_handler = SQLConnectionHandler()

filetype_id = convert_to_id(filetype, 'filepath_type', conn_handler)
_, mp = get_mountpoint('analysis', conn_handler)[0]
filetype_id = convert_to_id(filetype, 'filepath_type')
_, mp = get_mountpoint('analysis')[0]
fpid = insert_filepaths([
(join(mp, filename), filetype_id)], -1, 'analysis', 'filepath',
conn_handler, move_files=False)[0]
Expand Down
15 changes: 5 additions & 10 deletions qiita_db/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,15 +113,13 @@ def _check_subclass(cls):
raise IncompetentQiitaDeveloperError(
"Could not instantiate an object of the base class")

def _check_id(self, id_, conn_handler=None):
def _check_id(self, id_):
r"""Check that the provided ID actually exists on the database

Parameters
----------
id_ : object
The ID to test
conn_handler : SQLConnectionHandler
The connection handler object connected to the DB

Notes
-----
Expand All @@ -132,8 +130,7 @@ def _check_id(self, id_, conn_handler=None):
"""
self._check_subclass()

conn_handler = (conn_handler if conn_handler is not None
else SQLConnectionHandler())
conn_handler = SQLConnectionHandler()

return conn_handler.execute_fetchone(
"SELECT EXISTS(SELECT * FROM qiita.{0} WHERE "
Expand Down Expand Up @@ -229,7 +226,7 @@ def status(self, status):
"(SELECT {0}_status_id FROM qiita.{0}_status WHERE status = %s) "
"WHERE {0}_id = %s".format(self._table), (status, self._id))

def check_status(self, status, exclude=False, conn_handler=None):
def check_status(self, status, exclude=False):
r"""Checks status of object.

Parameters
Expand All @@ -239,8 +236,6 @@ def check_status(self, status, exclude=False, conn_handler=None):
exclude: bool, optional
If True, will check that database status is NOT one of the statuses
passed. Default False.
conn_handler: SQLConnectionHandler, optional
The connection handler object connected to the DB

Returns
-------
Expand All @@ -265,8 +260,8 @@ def check_status(self, status, exclude=False, conn_handler=None):
self._check_subclass()

# Get all available statuses
conn_handler = (conn_handler if conn_handler is not None
else SQLConnectionHandler())
conn_handler = SQLConnectionHandler()

statuses = [x[0] for x in conn_handler.execute_fetchall(
"SELECT DISTINCT status FROM qiita.{0}_status".format(self._table),
(self._id, ))]
Expand Down
41 changes: 17 additions & 24 deletions qiita_db/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,15 +139,15 @@ def _link_data_filepaths(self, fp_ids, conn_handler):
"VALUES (%s, %s)".format(self._data_filepath_table,
self._data_filepath_column), values)

def add_filepaths(self, filepaths, conn_handler=None):
def add_filepaths(self, filepaths):
r"""Populates the DB tables for storing the filepaths and connects the
`self` objects with these filepaths"""
# Check that this function has been called from a subclass
self._check_subclass()

# Check if the connection handler has been provided. Create a new
# one if not.
conn_handler = conn_handler if conn_handler else SQLConnectionHandler()
conn_handler = SQLConnectionHandler()

# Update the status of the current object
self._set_link_filepaths_status("linking")
Expand Down Expand Up @@ -193,11 +193,11 @@ def get_filepaths(self):
self._data_filepath_table,
self._data_filepath_column), {'id': self.id})

_, fb = get_mountpoint(self._table, conn_handler)[0]
_, fb = get_mountpoint(self._table)[0]
base_fp = partial(join, fb)

return [(fpid, base_fp(fp), convert_from_id(fid, "filepath_type",
conn_handler)) for fpid, fp, fid in db_paths]
return [(fpid, base_fp(fp), convert_from_id(fid, "filepath_type"))
for fpid, fp, fid in db_paths]

def get_filepath_ids(self):
self._check_subclass()
Expand Down Expand Up @@ -326,7 +326,7 @@ def create(cls, filetype, studies, filepaths=None):

# If file paths have been provided, add them to the raw data object
if filepaths:
rd.add_filepaths(filepaths, conn_handler)
rd.add_filepaths(filepaths)

return rd

Expand Down Expand Up @@ -465,20 +465,15 @@ def prep_templates(self):
"WHERE raw_data_id = %s ORDER BY prep_template_id")
return [x[0] for x in conn_handler.execute_fetchall(sql, (self._id,))]

def _is_preprocessed(self, conn_handler=None):
def _is_preprocessed(self):
"""Returns whether the RawData has been preprocessed or not

Parameters
----------
conn_handler : SQLConnectionHandler
The connection handler object connected to the DB

Returns
-------
bool
whether the RawData has been preprocessed or not
"""
conn_handler = conn_handler if conn_handler else SQLConnectionHandler()
conn_handler = SQLConnectionHandler()
return conn_handler.execute_fetchone(
"SELECT EXISTS(SELECT * FROM qiita.prep_template_preprocessed_data"
" PTPD JOIN qiita.prep_template PT ON PT.prep_template_id = "
Expand Down Expand Up @@ -507,7 +502,7 @@ def _remove_filepath(self, fp, conn_handler, queue):
"""
# If the RawData has been already preprocessed, we cannot remove any
# file - raise an error
if self._is_preprocessed(conn_handler):
if self._is_preprocessed():
msg = ("Cannot clear all the filepaths from raw data %s, it has "
"been already preprocessed" % self._id)
self._set_link_filepaths_status("failed: %s" % msg)
Expand All @@ -525,7 +520,7 @@ def _remove_filepath(self, fp, conn_handler, queue):
raise QiitaDBError(msg)

# Get the filpeath id
fp_id = get_filepath_id(self._table, fp, conn_handler)
fp_id = get_filepath_id(self._table, fp)
fp_is_mine = conn_handler.execute_fetchone(
"SELECT EXISTS(SELECT * FROM qiita.{0} WHERE filepath_id=%s AND "
"{1}=%s)".format(self._data_filepath_table,
Expand Down Expand Up @@ -579,8 +574,7 @@ def clear_filepaths(self):

# Move the files, if they are not used, if you get to this point
# self.studies should only have one element, thus self.studies[0]
move_filepaths_to_upload_folder(self.studies[0], filepaths,
conn_handler=conn_handler)
move_filepaths_to_upload_folder(self.studies[0], filepaths)

def remove_filepath(self, fp):
"""Removes the filepath from the RawData
Expand Down Expand Up @@ -614,7 +608,7 @@ def remove_filepath(self, fp):
self._set_link_filepaths_status("idle")

# Delete the files, if they are not used anywhere
purge_filepaths(conn_handler)
purge_filepaths()

def status(self, study):
"""The status of the raw data within the given study
Expand Down Expand Up @@ -753,7 +747,7 @@ def create(cls, study, preprocessed_params_table, preprocessed_params_id,
data_type = prep_template.data_type(ret_id=True)
else:
# only data_type, so need id from the text
data_type = convert_to_id(data_type, "data_type", conn_handler)
data_type = convert_to_id(data_type, "data_type")

# Check that the preprocessed_params_table exists
if not exists_dynamic_table(preprocessed_params_table, "preprocessed_",
Expand Down Expand Up @@ -804,7 +798,7 @@ def create(cls, study, preprocessed_params_table, preprocessed_params_id,
conn_handler.execute_queue(q)

# Add the filepaths to the database and connect them
ppd.add_filepaths(filepaths, conn_handler)
ppd.add_filepaths(filepaths)
return ppd

@classmethod
Expand Down Expand Up @@ -1278,7 +1272,7 @@ def create(cls, processed_params_table, processed_params_id, filepaths,
"You must provide either a preprocessed_data, a "
"data_type, or both")
else:
data_type = convert_to_id(data_type, "data_type", conn_handler)
data_type = convert_to_id(data_type, "data_type")

# We first check that the processed_params_table exists
if not exists_dynamic_table(processed_params_table,
Expand Down Expand Up @@ -1321,7 +1315,7 @@ def create(cls, processed_params_table, processed_params_id, filepaths,
"(%s, %s)".format(cls._study_processed_table),
(study_id, pd_id))

pd.add_filepaths(filepaths, conn_handler)
pd.add_filepaths(filepaths)
return cls(pd_id)

@classmethod
Expand Down Expand Up @@ -1524,8 +1518,7 @@ def status(self, status):

conn_handler = SQLConnectionHandler()

status_id = convert_to_id(status, 'processed_data_status',
conn_handler=conn_handler)
status_id = convert_to_id(status, 'processed_data_status')

sql = """UPDATE qiita.{0} SET processed_data_status_id = %s
WHERE processed_data_id=%s""".format(self._table)
Expand Down
10 changes: 5 additions & 5 deletions qiita_db/job.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ def exists(cls, datatype, command, options, analysis,
"""
conn_handler = SQLConnectionHandler()
# check passed arguments and grab analyses for matching jobs
datatype_id = convert_to_id(datatype, "data_type", conn_handler)
datatype_id = convert_to_id(datatype, "data_type")
sql = "SELECT command_id FROM qiita.command WHERE name = %s"
command_id = conn_handler.execute_fetchone(sql, (command, ))[0]
opts_json = params_dict_to_json(options)
Expand Down Expand Up @@ -238,7 +238,7 @@ def create(cls, datatype, command, options, analysis,
"analysis: %s" % (datatype, command, options, analysis.id))

# Get the datatype and command ids from the strings
datatype_id = convert_to_id(datatype, "data_type", conn_handler)
datatype_id = convert_to_id(datatype, "data_type")
sql = "SELECT command_id FROM qiita.command WHERE name = %s"
command_id = conn_handler.execute_fetchone(sql, (command, ))[0]
opts_json = params_dict_to_json(options)
Expand Down Expand Up @@ -297,7 +297,7 @@ def options(self):
"job_id = %s)".format(self._table))
db_comm = conn_handler.execute_fetchone(sql, (self._id, ))
out_opt = loads(db_comm[1])
basedir = get_db_files_base_dir(conn_handler)
basedir = get_db_files_base_dir()
join_f = partial(join, join(basedir, "job"))
for k in out_opt:
opts[k] = join_f("%s_%s_%s" % (self._id, db_comm[0], k.strip("-")))
Expand Down Expand Up @@ -422,7 +422,7 @@ def add_results(self, results):
conn_handler = SQLConnectionHandler()
self._lock_job(conn_handler)
# convert all file type text to file type ids
res_ids = [(fp, convert_to_id(fptype, "filepath_type", conn_handler))
res_ids = [(fp, convert_to_id(fptype, "filepath_type"))
for fp, fptype in results]
file_ids = insert_filepaths(res_ids, self._id, self._table,
"filepath", conn_handler, move_files=False)
Expand Down Expand Up @@ -485,7 +485,7 @@ def get_commands_by_datatype(cls, datatypes=None):
conn_handler = SQLConnectionHandler()
# get the ids of the datatypes to get commands for
if datatypes is not None:
datatype_info = [(convert_to_id(dt, "data_type", conn_handler), dt)
datatype_info = [(convert_to_id(dt, "data_type"), dt)
for dt in datatypes]
else:
datatype_info = conn_handler.execute_fetchall(
Expand Down
Loading