Skip to content

Commit

Permalink
Merge branch 'master' of https://github.com/biocore/qiita into improv…
Browse files Browse the repository at this point in the history
…e-sql-queues
  • Loading branch information
josenavas committed Jun 19, 2015
2 parents 00244b8 + 1f9308e commit ff9f304
Show file tree
Hide file tree
Showing 22 changed files with 686 additions and 129 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ http://qiime.org/documentation/file_formats.html#mapping-file-overview.
* `qiita webserver` has been moved to `qiita pet webserver`
* Cluster names now use dashes instead of underscores (e.g., `qiita_general` is now `qiita-general`)
* `qiita-general` is now used as a default argument to `qiita-env start_cluster` and `qiita-env stop_cluster` if no cluster name is specified
* Qiita now allows to process already demultiplexed data without any technical (barcode and primer) section of the read.

Version 0.1.0 (2015-04-30)
--------------------------
Expand Down
61 changes: 61 additions & 0 deletions qiita_db/metadata_template/base_metadata_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -474,6 +474,7 @@ class MetadataTemplate(QiitaObject):
get
to_file
add_filepath
update
See Also
--------
Expand Down Expand Up @@ -1092,6 +1093,66 @@ def categories(self):

return cols

def update(self, md_template):
r"""Update values in the template
Parameters
----------
md_template : DataFrame
The metadata template file contents indexed by samples Ids
Raises
------
QiitaDBError
If md_template and db do not have the same sample ids
If md_template and db do not have the same column headers
If self.can_be_updated is not True
"""
conn_handler = SQLConnectionHandler()

# Clean and validate the metadata template given
new_map = self._clean_validate_template(md_template, self.study_id,
self.columns_restrictions)
# Retrieving current metadata
current_map = self._transform_to_dict(conn_handler.execute_fetchall(
"SELECT * FROM qiita.{0}".format(self._table_name(self.id))))
current_map = pd.DataFrame.from_dict(current_map, orient='index')

# simple validations of sample ids and column names
samples_diff = set(new_map.index).difference(current_map.index)
if samples_diff:
raise QiitaDBError('The new template differs from what is stored '
'in database by these samples names: %s'
% ', '.join(samples_diff))
columns_diff = set(new_map.columns).difference(current_map.columns)
if columns_diff:
raise QiitaDBError('The new template differs from what is stored '
'in database by these columns names: %s'
% ', '.join(columns_diff))

# here we are comparing two dataframes following:
# http://stackoverflow.com/a/17095620/4228285
current_map.sort(axis=0, inplace=True)
current_map.sort(axis=1, inplace=True)
new_map.sort(axis=0, inplace=True)
new_map.sort(axis=1, inplace=True)
map_diff = (current_map != new_map).stack()
map_diff = map_diff[map_diff]
map_diff.index.names = ['id', 'column']
changed_cols = map_diff.index.get_level_values('column').unique()

if not self.can_be_updated(columns=set(changed_cols)):
raise QiitaDBError('The new template is modifying fields that '
'cannot be modified. Try removing the target '
'gene fields or deleting the processed data. '
'You are trying to modify: %s'
% ', '.join(changed_cols))

for col in changed_cols:
self.update_category(col, new_map[col].to_dict())

self.generate_files()

def update_category(self, category, samples_and_values):
"""Update an existing column
Expand Down
50 changes: 50 additions & 0 deletions qiita_db/metadata_template/prep_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

from __future__ import division
from future.utils import viewvalues
from itertools import chain
from os.path import join
from time import strftime
from copy import deepcopy
Expand Down Expand Up @@ -288,6 +289,55 @@ def data_type(self, ret_id=False):
"qiita.prep_template p ON p.data_type_id = d.data_type_id WHERE "
"p.prep_template_id=%s".format(ret), (self.id,))[0]

@property
def columns_restrictions(self):
"""Gets the dictionary of colums required based on data_type
Returns
-------
dict
The dict of restictions based on the data_type
"""
pt_cols = deepcopy(PREP_TEMPLATE_COLUMNS)
if self.data_type() in TARGET_GENE_DATA_TYPES:
pt_cols.update(PREP_TEMPLATE_COLUMNS_TARGET_GENE)

return pt_cols

def can_be_updated(self, columns):
"""Gets if the template can be updated
Parameters
----------
columns : set
A set of the names of the columns to be updated
Returns
-------
bool
If the template can be updated
Notes
-----
The prep template can be updated when (1) it has no preprocessed data
or the prep template data-type is not part of TARGET_GENE_DATA_TYPES,
(2) if is part of TARGET_GENE_DATA_TYPES then we will only update if
the columns being updated are not part of
PREP_TEMPLATE_COLUMNS_TARGET_GENE
"""
if (not self.preprocessed_data or
self.data_type() not in TARGET_GENE_DATA_TYPES):
return True

tg_columns = set(chain.from_iterable(
[v.columns for v in
viewvalues(PREP_TEMPLATE_COLUMNS_TARGET_GENE)]))

if not columns & tg_columns:
return True

return False

@property
def raw_data(self):
conn_handler = SQLConnectionHandler()
Expand Down
100 changes: 35 additions & 65 deletions qiita_db/metadata_template/sample_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,6 @@
from os.path import join
from time import strftime

import pandas as pd

from qiita_core.exceptions import IncompetentQiitaDeveloperError
from qiita_db.exceptions import (QiitaDBDuplicateError, QiitaDBError,
QiitaDBUnknownIDError)
Expand Down Expand Up @@ -189,6 +187,41 @@ def study_id(self):
"""
return self._id

@property
def columns_restrictions(self):
"""Gets the dictionary of colums required
Returns
-------
dict
The dict of restictions
"""
return SAMPLE_TEMPLATE_COLUMNS

def can_be_updated(self, **kwargs):
"""Gets if the template can be updated
Parameters
----------
kwargs : ignored
Necessary to have in parameters to support other objects.
Returns
-------
bool
As this is the sample template, it will always return True. See the
notes.
Notes
-----
The prep template can't be updated in certain situations, see the
its documentation for more info. However, the sample template
doesn't have those restrictions. Thus, to be able to use the same
update code in the base class, we need to have this method and it
should always return True.
"""
return True

def generate_files(self):
r"""Generates all the files that contain data from this template
"""
Expand Down Expand Up @@ -226,66 +259,3 @@ def extend(self, md_template):
conn_handler.execute_queue(queue_name)

self.generate_files()

def update(self, md_template):
r"""Update values in the sample template
Parameters
----------
md_template : DataFrame
The metadata template file contents indexed by samples Ids
Raises
------
QiitaDBError
If md_template and db do not have the same sample ids
If md_template and db do not have the same column headers
"""
conn_handler = SQLConnectionHandler()

# Clean and validate the metadata template given
new_map = self._clean_validate_template(md_template, self.id,
SAMPLE_TEMPLATE_COLUMNS)
# Retrieving current metadata
current_map = self._transform_to_dict(conn_handler.execute_fetchall(
"SELECT * FROM qiita.{0} WHERE {1}=%s".format(self._table,
self._id_column),
(self.id,)))
dyn_vals = self._transform_to_dict(conn_handler.execute_fetchall(
"SELECT * FROM qiita.{0}".format(self._table_name(self.id))))

for k in current_map:
current_map[k].update(dyn_vals[k])
current_map[k].pop('study_id', None)

# converting sql results to dataframe
current_map = pd.DataFrame.from_dict(current_map, orient='index')

# simple validations of sample ids and column names
samples_diff = set(
new_map.index.tolist()) - set(current_map.index.tolist())
if samples_diff:
raise QiitaDBError('The new sample template differs from what is '
'stored in database by these samples names: %s'
% ', '.join(samples_diff))
columns_diff = set(new_map.columns) - set(current_map.columns)
if columns_diff:
raise QiitaDBError('The new sample template differs from what is '
'stored in database by these columns names: %s'
% ', '.join(columns_diff))

# here we are comparing two dataframes following:
# http://stackoverflow.com/a/17095620/4228285
current_map.sort(axis=0, inplace=True)
current_map.sort(axis=1, inplace=True)
new_map.sort(axis=0, inplace=True)
new_map.sort(axis=1, inplace=True)
map_diff = (current_map != new_map).stack()
map_diff = map_diff[map_diff]
map_diff.index.names = ['id', 'column']
changed_cols = map_diff.index.get_level_values('column').unique()

for col in changed_cols:
self.update_category(col, new_map[col].to_dict())

self.generate_files()
21 changes: 21 additions & 0 deletions qiita_db/metadata_template/test/test_prep_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from os import close, remove
from os.path import join
from collections import Iterable
from copy import deepcopy

import numpy.testing as npt
import pandas as pd
Expand Down Expand Up @@ -255,6 +256,19 @@ def test_get_none(self):
"""get returns none if the sample id is not present"""
self.assertTrue(self.tester.get('Not_a_Category') is None)

def test_columns_restrictions(self):
"""that it returns SAMPLE_TEMPLATE_COLUMNS"""
exp = deepcopy(PREP_TEMPLATE_COLUMNS)
exp.update(PREP_TEMPLATE_COLUMNS_TARGET_GENE)
self.assertEqual(self.prep_template.columns_restrictions, exp)

def test_can_be_updated(self):
"""test if the template can be updated"""
# you can't update restricted colums in a pt with data
self.assertFalse(self.prep_template.can_be_updated({'barcode'}))
# but you can if not restricted
self.assertTrue(self.prep_template.can_be_updated({'center_name'}))


@qiita_test_checker()
class TestPrepSampleReadWrite(BaseTestPrepSample):
Expand Down Expand Up @@ -1343,6 +1357,13 @@ def test_raw_data_setter(self):
pt.raw_data = rd
self.assertEqual(pt.raw_data, rd.id)

def test_can_be_updated_on_new(self):
"""test if the template can be updated"""
# you can update a newly created pt
pt = PrepTemplate.create(self.metadata, self.test_study,
self.data_type)
self.assertTrue(pt.can_be_updated({'barcode'}))


EXP_PREP_TEMPLATE = (
'sample_name\tbarcode\tcenter_name\tcenter_project_name\t'
Expand Down
9 changes: 9 additions & 0 deletions qiita_db/metadata_template/test/test_sample_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,15 @@ def test_get_none(self):
"""get returns none if the sample id is not present"""
self.assertTrue(self.tester.get('Not_a_Category') is None)

def test_columns_restrictions(self):
"""that it returns SAMPLE_TEMPLATE_COLUMNS"""
self.assertEqual(self.sample_template.columns_restrictions,
SAMPLE_TEMPLATE_COLUMNS)

def test_can_be_updated(self):
"""test if the template can be updated"""
self.assertTrue(self.sample_template.can_be_updated)


@qiita_test_checker()
class TestSampleReadWrite(BaseTestSample):
Expand Down
15 changes: 15 additions & 0 deletions qiita_db/support_files/patches/26.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
-- Jun 11, 2015

-- Updating FASTA-Sanger -> FASTA_Sanger, needed so we can put restrictions on
-- what kind of files the user can select in the GUI
UPDATE qiita.filetype SET type='FASTA_Sanger' WHERE type = 'FASTA-Sanger';

-- Adding new filetype
INSERT INTO qiita.filetype (type) VALUES ('per_sample_FASTQ');


-- Adding new illumina processing params if they do not exists
-- adapted from: http://stackoverflow.com/a/13902402
INSERT INTO qiita.preprocessed_sequence_illumina_params (param_set_name, barcode_type)
SELECT DISTINCT 'per sample FASTQ defaults', 'not-barcoded' FROM qiita.preprocessed_sequence_illumina_params
WHERE NOT EXISTS (SELECT 1 FROM qiita.preprocessed_sequence_illumina_params WHERE barcode_type = 'not-barcoded');
2 changes: 1 addition & 1 deletion qiita_db/test/test_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ def test_preprocessed_filepath(self):

def test_preprocessed_sequence_illumina_params(self):
self.assertEqual(
get_count("qiita.preprocessed_sequence_illumina_params"), 6)
get_count("qiita.preprocessed_sequence_illumina_params"), 7)

def test_processed_data(self):
self.assertEqual(get_count("qiita.processed_data"), 1)
Expand Down
6 changes: 5 additions & 1 deletion qiita_db/test/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,9 @@ def test_exists_dynamic_table(self):
def test_convert_to_id(self):
"""Tests that ids are returned correctly"""
self.assertEqual(convert_to_id("directory", "filepath_type"), 8)
self.assertEqual(convert_to_id("running", "analysis_status",
"status"), 3)
self.assertEqual(convert_to_id("EMP", "portal_type", "portal"), 2)

def test_convert_to_id_bad_value(self):
"""Tests that ids are returned correctly"""
Expand All @@ -191,7 +194,8 @@ def test_get_filetypes(self):
"""Tests that get_filetypes works with valid arguments"""

obs = get_filetypes()
exp = {'SFF': 1, 'FASTA-Sanger': 2, 'FASTQ': 3, 'FASTA': 4}
exp = {'SFF': 1, 'FASTA_Sanger': 2, 'FASTQ': 3, 'FASTA': 4,
'per_sample_FASTQ': 5}
self.assertEqual(obs, exp)

obs = get_filetypes(key='filetype_id')
Expand Down
7 changes: 5 additions & 2 deletions qiita_db/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -809,7 +809,7 @@ def filepath_ids_to_rel_paths(filepath_ids):
return {}


def convert_to_id(value, table):
def convert_to_id(value, table, text_col=None):
"""Converts a string value to its corresponding table identifier
Parameters
Expand All @@ -818,6 +818,8 @@ def convert_to_id(value, table):
The string value to convert
table : str
The table that has the conversion
text_col : str, optional
Column holding the string value. Defaults to same as table name.
Returns
-------
Expand All @@ -829,8 +831,9 @@ def convert_to_id(value, table):
IncompetentQiitaDeveloperError
The passed string has no associated id
"""
text_col = table if text_col is None else text_col
conn_handler = SQLConnectionHandler()
sql = "SELECT {0}_id FROM qiita.{0} WHERE {0} = %s".format(table)
sql = "SELECT {0}_id FROM qiita.{0} WHERE {1} = %s".format(table, text_col)
_id = conn_handler.execute_fetchone(sql, (value, ))
if _id is None:
raise IncompetentQiitaDeveloperError("%s not valid for table %s"
Expand Down
Loading

0 comments on commit ff9f304

Please sign in to comment.