From 54c6f1498a0cba52bc0efd7ca906efbb6f8929fe Mon Sep 17 00:00:00 2001 From: Jose Navas Date: Thu, 17 Sep 2015 11:10:31 -0700 Subject: [PATCH 01/12] Fixing minimal mapping generation --- qiita_ware/processing_pipeline.py | 38 +++++++++++++++++-------------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/qiita_ware/processing_pipeline.py b/qiita_ware/processing_pipeline.py index 167dbb93e..6f4caa8ec 100644 --- a/qiita_ware/processing_pipeline.py +++ b/qiita_ware/processing_pipeline.py @@ -24,8 +24,9 @@ def _get_qiime_minimal_mapping(prep_template, out_dir): """Generates a minimal QIIME-compliant mapping file for split libraries The columns of the generated file are, in order: SampleID, BarcodeSequence, - LinkerPrimerSequence, Description. All values are taken from the prep - template except for Description, which always receive the value "Qiita MMF" + LinkerPrimerSequence, [ReverseLinkerPrimer] Description. All values are + taken from the prep template except for Description, which always receive + the value "Qiita MMF" Parameters ---------- @@ -40,42 +41,45 @@ def _get_qiime_minimal_mapping(prep_template, out_dir): The paths to the qiime minimal mapping files """ from functools import partial - from collections import defaultdict from os.path import join import pandas as pd - # The prep templates has a QIIME mapping file, get it - qiime_map = pd.read_csv(prep_template.qiime_map_fp, sep='\t', - keep_default_na=False, na_values=['unknown'], - index_col=False, - converters=defaultdict(lambda: str)) - qiime_map.set_index('#SampleID', inplace=True, drop=True) + pt_df = prep_template.to_dataframe() - # We use our own description to avoid potential processing problems - qiime_map['Description'] = pd.Series(['Qiita MMF'] * len(qiime_map.index), - index=qiime_map.index) + rename_cols = { + 'barcode': 'BarcodeSequence', + 'primer': 'LinkerPrimerSequence', + } - # We ensure the order of the columns as QIIME is expecting - if 'ReverseLinkerPrimer' in qiime_map: + # Ensure the order of the columns as QIIME is expecting + if 'reverselinkerprimer' in pt_df: + rename_cols['reverselinkerprimer'] = 'ReverseLinkerPrimer' cols = ['BarcodeSequence', 'LinkerPrimerSequence', 'ReverseLinkerPrimer', 'Description'] else: cols = ['BarcodeSequence', 'LinkerPrimerSequence', 'Description'] + pt_df.rename(columns=rename_cols, inplace=True) + + # Sometimes, the Description column can generate some problems in QIIME, + # depending on its values. We set it up to read Qiita MMF for all rows + pt_df['Description'] = pd.Series(['Qiita MMF'] * len(pt_df.index), + index=pt_df.index) + path_builder = partial(join, out_dir) - if 'run_prefix' in qiime_map: + if 'run_prefix' in pt_df: # The study potentially has more than 1 lane, so we should generate a # qiime MMF for each of the lanes. We know how to split the prep # template based on the run_prefix column output_fps = [] - for prefix, df in qiime_map.groupby('run_prefix'): + for prefix, df in pt_df.groupby('run_prefix'): df = df[cols] out_fp = path_builder("%s_MMF.txt" % prefix) output_fps.append(out_fp) df.to_csv(out_fp, index_label="#SampleID", sep='\t') else: # The study only has one lane, just write the MMF - df = qiime_map[cols] + df = pt_df[cols] out_fp = path_builder("prep_%d_MMF.txt" % prep_template.id) output_fps = [out_fp] df.to_csv(out_fp, index_label="#SampleID", sep='\t') From 76604a4e5173362e8c39a68bbac69b0db0af14ec Mon Sep 17 00:00:00 2001 From: Jose Navas Date: Thu, 17 Sep 2015 15:20:00 -0700 Subject: [PATCH 02/12] Addressing @mortonjt comment --- qiita_ware/processing_pipeline.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qiita_ware/processing_pipeline.py b/qiita_ware/processing_pipeline.py index 6f4caa8ec..e3835c4bf 100644 --- a/qiita_ware/processing_pipeline.py +++ b/qiita_ware/processing_pipeline.py @@ -30,7 +30,7 @@ def _get_qiime_minimal_mapping(prep_template, out_dir): Parameters ---------- - prep_template : PrepTemplate + prep_template : qiita_db.metadata_template.PrepTemplate The prep template from which we need to generate the minimal mapping out_dir : str Path to the output directory From eae80aa38f6b4035abd82b95dcb90f3f7e7fc99d Mon Sep 17 00:00:00 2001 From: Jose Navas Date: Thu, 17 Sep 2015 15:20:13 -0700 Subject: [PATCH 03/12] Adding test --- qiita_ware/test/test_processing_pipeline.py | 53 +++++++++++++++++++-- 1 file changed, 50 insertions(+), 3 deletions(-) diff --git a/qiita_ware/test/test_processing_pipeline.py b/qiita_ware/test/test_processing_pipeline.py index 2b432121a..be72745db 100644 --- a/qiita_ware/test/test_processing_pipeline.py +++ b/qiita_ware/test/test_processing_pipeline.py @@ -14,6 +14,7 @@ from functools import partial from shutil import rmtree +import numpy.testing as npt import pandas as pd import gzip @@ -23,12 +24,13 @@ from qiita_db.sql_connection import SQLConnectionHandler from qiita_db.data import RawData, PreprocessedData -from qiita_db.study import Study +from qiita_db.study import Study, StudyPerson +from qiita_db.user import User from qiita_db.parameters import (PreprocessedIlluminaParams, ProcessedSortmernaParams, Preprocessed454Params) - -from qiita_db.metadata_template import PrepTemplate +from qiita_db.exceptions import QiitaDBWarning +from qiita_db.metadata_template import PrepTemplate, SampleTemplate from qiita_ware.processing_pipeline import (_get_preprocess_fastq_cmd, _get_preprocess_fasta_cmd, _insert_preprocessed_data, @@ -192,6 +194,45 @@ def test_get_qiime_minimal_mapping_single_reverse_primer(self): with open(exp_fps[0], "U") as f: self.assertEqual(f.read(), EXP_PREP_RLP) + def test_get_qiime_minimal_mapping_numeric_sample_ids(self): + # Get minimal mapping file works correctly with numeric sample ids. A + # bug was found that samples of the type .[0-9]*0 where + # truncated to .[0-9]* + info = {"timeseries_type_id": 1, + "metadata_complete": True, + "mixs_compliant": True, + "number_samples_collected": 25, + "number_samples_promised": 28, + "study_alias": "testing", + "study_description": "Test description", + "study_abstract": "Test abstract", + "emp_person_id": StudyPerson(2), + "principal_investigator_id": StudyPerson(3), + "lab_person_id": StudyPerson(1) + } + new_study = Study.create(User('test@foo.bar'), "Test study", [1], info) + metadata_dict = {'1': {'host_subject_id': 'NotIdentified'}, + '10': {'host_subject_id': 'NotIdentified'}} + metadata = pd.DataFrame.from_dict(metadata_dict, orient='index') + npt.assert_warns(QiitaDBWarning, SampleTemplate.create, metadata, + new_study) + metadata_dict = {'1': {'str_column': 'Value for sample 1', + 'primer': 'GTGCCAGCMGCCGCGGTAA', + 'barcode': 'GTCCGCAAGTTA'}, + '10': {'str_column': 'Value for sample 1', + 'primer': 'GTGCCAGCMGCCGCGGTAA', + 'barcode': 'CGTAGAGCTCTC'}} + metadata = pd.DataFrame.from_dict(metadata_dict, orient='index') + pt = npt.assert_warns(QiitaDBWarning, PrepTemplate.create, metadata, + new_study, 2) + out_dir = mkdtemp() + obs_fps = _get_qiime_minimal_mapping(pt, out_dir) + exp_fps = [join(out_dir, 'prep_%s_MMF.txt' % pt.id)] + self.assertEqual(obs_fps, exp_fps) + self.assertTrue(exists(exp_fps[0])) + with open(exp_fps[0], 'U') as f: + self.assertEqual(f.read(), EXP_PREP_NUM.format(new_study.id)) + def test_get_qiime_minimal_mapping_multiple(self): # We need to create a prep template in which we have different run # prefix values, so we can test this case @@ -771,6 +812,12 @@ def test_insert_processed_data_target_gene(self): "1.SKM8.640201\tCCGATGCCTTGA\tGTGCCAGCMGCCGCGGTAA\tGTGCCAGCM\tQiita MMF\n" "1.SKM9.640192\tAGCAGGCACGAA\tGTGCCAGCMGCCGCGGTAA\tGTGCCAGCM\tQiita MMF\n") +EXP_PREP_NUM = ( + "#SampleID\tBarcodeSequence\tLinkerPrimerSequence\tDescription\n" + "{0}.1\tGTCCGCAAGTTA\tGTGCCAGCMGCCGCGGTAA\tQiita MMF\n" + "{0}.10\tCGTAGAGCTCTC\tGTGCCAGCMGCCGCGGTAA\tQiita MMF\n" +) + EXP_PREP_1 = ( "#SampleID\tBarcodeSequence\tLinkerPrimerSequence\tDescription\n" "1.SKB8.640193\tGTCCGCAAGTTA\tGTGCCAGCMGCCGCGGTAA\tQiita MMF\n" From 28eafdadd06c71f74fcf3ed3aef71c2df8d60034 Mon Sep 17 00:00:00 2001 From: Jose Navas Date: Mon, 28 Sep 2015 10:05:46 -0700 Subject: [PATCH 04/12] Add command to update the raw data --- qiita_db/commands.py | 57 +++++++++++++++++++++++++++++++++++++++++++- scripts/qiita | 7 ++++++ 2 files changed, 63 insertions(+), 1 deletion(-) diff --git a/qiita_db/commands.py b/qiita_db/commands.py index 4004ae12d..b062cb2cf 100644 --- a/qiita_db/commands.py +++ b/qiita_db/commands.py @@ -307,6 +307,60 @@ def load_parameters_from_cmd(name, fp, table): return constructor.create(name, **params) +def update_raw_data_from_cmd(filepaths, filepath_types, study_id, rd_id=None): + """Updates the raw data of the study 'study_id' + + Parameters + ---------- + filepaths : iterable of str + Paths to the raw data files + filepath_types : iterable of str + Describes the contents of the files + study_id : int + The study_id of the study to be updated + rd_id : int, optional + The id of the raw data to be updated. If not provided, the raw data + with lowest id in the study will be updated + + Returns + ------- + qiita_db.data.RawData + + Raises + ------ + ValueError + If 'filepaths' and 'filepath_types' do not have the same length + If the study does not have any raw data + If rd_id is provided and it does not belong to the given study + """ + if len(filepaths) != len(filepath_types): + raise ValueError("Please provide exactly one filepath_type for each" + "and every filepath") + with TRN: + study = Study(study_id) + raw_data_ids = study.raw_data() + if not raw_data_ids: + raise ValueError("Study %s does not have any raw data" % study_id) + + if rd_id: + if rd_id not in raw_data_ids: + raise ValueError( + "The raw data %d does not exist in the study %d. Available" + " raw data: %s" + % (rd_id, study_id, ', '.join(map(str, raw_data_ids)))) + raw_data = RawData(rd_id) + else: + raw_data = RawData(sorted(raw_data_ids)[0]) + + filepath_types_dict = get_filepath_types() + filepath_types = [filepath_types_dict[x] for x in filepath_types] + + raw_data.clear_filepaths() + raw_data.add_filepaths(list(zip(filepaths, filepath_types))) + + return raw_data + + def update_preprocessed_data_from_cmd(sl_out_dir, study_id, ppd_id=None): """Updates the preprocessed data of the study 'study_id' @@ -351,7 +405,8 @@ def update_preprocessed_data_from_cmd(sl_out_dir, study_id, ppd_id=None): study = Study(study_id) ppds = study.preprocessed_data() if not ppds: - raise ValueError("Study %s does not have any preprocessed data") + raise ValueError("Study %s does not have any preprocessed data", + study_id) if ppd_id: if ppd_id not in ppds: diff --git a/scripts/qiita b/scripts/qiita index 2c0317da2..5f4641f71 100755 --- a/scripts/qiita +++ b/scripts/qiita @@ -294,6 +294,13 @@ def update_preprocessed_data(sl_out_dir, study, preprocessed_data): preprocessed_data) click.echo("Preprocessed data %s successfully updated" % ppd.id) + +@db.command() +def update_raw_data(): + """""" + rd = update_raw_data_from_cmd() + click.echo("Raw data %s successfully updated" % rd.id) + # ############################################################################# # PORTAL COMMANDS # ############################################################################# From 14f75256700c9c6b658e34412008a58a2c65cc66 Mon Sep 17 00:00:00 2001 From: Jose Navas Date: Wed, 30 Sep 2015 11:37:12 -0700 Subject: [PATCH 05/12] Fixing bug in the command --- qiita_db/commands.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/qiita_db/commands.py b/qiita_db/commands.py index b062cb2cf..0533305ff 100644 --- a/qiita_db/commands.py +++ b/qiita_db/commands.py @@ -18,7 +18,7 @@ from .study import Study, StudyPerson from .user import User from .util import (get_filetypes, get_filepath_types, compute_checksum, - convert_to_id) + convert_to_id, move_filepaths_to_upload_folder) from .data import RawData, PreprocessedData, ProcessedData from .metadata_template import (SampleTemplate, PrepTemplate, load_template_to_dataframe) @@ -355,7 +355,12 @@ def update_raw_data_from_cmd(filepaths, filepath_types, study_id, rd_id=None): filepath_types_dict = get_filepath_types() filepath_types = [filepath_types_dict[x] for x in filepath_types] - raw_data.clear_filepaths() + fps = raw_data.get_filepaths() + sql = "DELETE FROM qiita.raw_filepath WHERE raw_data_id = %s" + TRN.add(sql, [raw_data.id]) + TRN.execute() + move_filepaths_to_upload_folder(study_id, fps) + raw_data.add_filepaths(list(zip(filepaths, filepath_types))) return raw_data From 42f1e3255132c81c50325fd72169d059487c8500 Mon Sep 17 00:00:00 2001 From: Jose Navas Date: Wed, 30 Sep 2015 11:37:26 -0700 Subject: [PATCH 06/12] Adding tests --- qiita_db/test/test_commands.py | 103 ++++++++++++++++++++++++++++++++- 1 file changed, 101 insertions(+), 2 deletions(-) diff --git a/qiita_db/test/test_commands.py b/qiita_db/test/test_commands.py index 4d161ff07..0fdaa42e5 100644 --- a/qiita_db/test/test_commands.py +++ b/qiita_db/test/test_commands.py @@ -14,6 +14,7 @@ from future.utils.six import StringIO from future import standard_library from functools import partial +from operator import itemgetter import pandas as pd @@ -23,13 +24,14 @@ load_processed_data_cmd, load_preprocessed_data_from_cmd, load_parameters_from_cmd, + update_raw_data_from_cmd, update_preprocessed_data_from_cmd) from qiita_db.environment_manager import patch from qiita_db.study import Study, StudyPerson from qiita_db.user import User -from qiita_db.data import PreprocessedData +from qiita_db.data import PreprocessedData, RawData from qiita_db.util import (get_count, check_count, get_db_files_base_dir, - get_mountpoint) + get_mountpoint, compute_checksum) from qiita_db.metadata_template import PrepTemplate from qiita_core.util import qiita_test_checker from qiita_ware.processing_pipeline import generate_demux_file @@ -452,6 +454,103 @@ def test_python_patch(self): self._assert_current_patch('10.sql') +@qiita_test_checker() +class TestUpdateRawDataFromCmd(TestCase): + def setUp(self): + fd, seqs_fp = mkstemp(suffix='_seqs.fastq') + close(fd) + fd, barcodes_fp = mkstemp(suffix='_barcodes.fastq') + close(fd) + self.filepaths = [seqs_fp, barcodes_fp] + self.checksums = [] + for fp in sorted(self.filepaths): + with open(fp, 'w') as f: + f.write("%s\n" % fp) + self.checksums.append(compute_checksum(fp)) + self.filepaths_types = ["raw_forward_seqs", "raw_barcodes"] + self._clean_up_files = [seqs_fp, barcodes_fp] + + info = { + "timeseries_type_id": 1, + "metadata_complete": True, + "mixs_compliant": True, + "number_samples_collected": 25, + "number_samples_promised": 28, + "study_alias": "FCM", + "study_description": "Microbiome of people who eat nothing but " + "fried chicken", + "study_abstract": "Exploring how a high fat diet changes the " + "gut microbiome", + "emp_person_id": StudyPerson(2), + "principal_investigator_id": StudyPerson(3), + "lab_person_id": StudyPerson(1) + } + self.new_study = Study.create(User("test@foo.bar"), + "Update raw data test", + efo=[1], info=info) + self.study = Study(1) + # The files for the RawData object attached to study 1 does not exist. + # Create them so we can actually perform the tests + for _, fp, _ in RawData(1).get_filepaths(): + with open(fp, 'w') as f: + f.write('\n') + self._clean_up_files.append(fp) + + def tearDown(self): + for f in self._clean_up_files: + if exists(f): + remove(f) + + def test_update_raw_data_from_cmd_diff_length(self): + with self.assertRaises(ValueError): + update_raw_data_from_cmd(self.filepaths[1:], self.filepaths_types, + self.study.id) + with self.assertRaises(ValueError): + update_raw_data_from_cmd(self.filepaths, self.filepaths_types[1:], + self.study.id) + + def test_update_raw_data_from_cmd_no_raw_data(self): + with self.assertRaises(ValueError): + update_raw_data_from_cmd(self.filepaths, self.filepaths_types, + self.new_study.id) + + def test_update_raw_data_from_cmd_wrong_raw_data_id(self): + # Using max(raw_data_ids) + 1 to make sure that the raw data id + # passed does not belong to the study + with self.assertRaises(ValueError): + update_raw_data_from_cmd(self.filepaths, self.filepaths_types, + self.study.id, + max(self.study.raw_data()) + 1) + + def test_update_raw_data_from_cmd(self): + rd = update_raw_data_from_cmd(self.filepaths, self.filepaths_types, + self.study.id) + # Make sure that we are cleaning the environment + for _, fp, _ in rd.get_filepaths(): + self._clean_up_files.append(fp) + + # The checkums are in filepath order. If we sort the rd.get_filepath() + # result by the filepath (itemgetter(1)) we will get them in the same + # order, so the checksums will not fail + for obs, exp in zip(sorted(rd.get_filepaths(), key=itemgetter(1)), + self.checksums): + self.assertEqual(compute_checksum(obs[1]), exp) + + def test_update_raw_data_from_cmd_rd_id(self): + rd = update_raw_data_from_cmd(self.filepaths, self.filepaths_types, + self.study.id, self.study.raw_data()[0]) + # Make sure that we are cleaning the environment + for _, fp, _ in rd.get_filepaths(): + self._clean_up_files.append(fp) + + # The checkums are in filepath order. If we sort the rd.get_filepath() + # result by the filepath (itemgetter(1)) we will get them in the same + # order, so the checksums will not fail + for obs, exp in zip(sorted(rd.get_filepaths(), key=itemgetter(1)), + self.checksums): + self.assertEqual(compute_checksum(obs[1]), exp) + + @qiita_test_checker() class TestUpdatePreprocessedDataFromCmd(TestCase): def setUp(self): From 47f9286e9be716be610dd9865619aa8d8fc499f6 Mon Sep 17 00:00:00 2001 From: Jose Navas Date: Wed, 30 Sep 2015 11:42:52 -0700 Subject: [PATCH 07/12] Finishing up CLI --- scripts/qiita | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/scripts/qiita b/scripts/qiita index 5f4641f71..aefd5f0fe 100755 --- a/scripts/qiita +++ b/scripts/qiita @@ -30,6 +30,7 @@ from qiita_db.commands import (load_sample_template_from_cmd, load_preprocessed_data_from_cmd, load_prep_template_from_cmd, load_parameters_from_cmd, SUPPORTED_PARAMS, + update_raw_data_from_cmd, update_preprocessed_data_from_cmd) from qiita_db.portal import Portal from qiita_db.sql_connection import SQLConnectionHandler @@ -278,6 +279,25 @@ def load_parameters(fp, table, name): % (table, param.id)) +@db.command() +@click.option('--fp', required=True, type=click.Path(resolve_path=True, + readable=True, exists=True), multiple=True, + help='Path to the raw data file. This option can be used ' + 'multiple times if there are multiple raw data files.') +@click.option('--fp_type', required=True, multiple=True, help='Describes the ' + 'contents of the file. Pass one fp_type per fp.', + type=click.Choice(get_filepath_types().keys())) +@click.option('--study', required=True, type=int, + help='Study whose raw data will be updated') +@click.option('--raw_data', required=False, type=int, + help='Raw data to be updated. If not passed, the raw data with ' + 'lowest id in the study will be updated.') +def update_raw_data(fp, fp_type, study, raw_data): + """Updates the raw data with the provided raw data files""" + rd = update_raw_data_from_cmd(fp, fp_type, study, rd_id=raw_data) + click.echo("Raw data %s successfully updated" % rd.id) + + @db.command() @click.argument('sl_out_dir', required=True, type=click.Path(resolve_path=True, readable=True, exists=True, @@ -294,13 +314,6 @@ def update_preprocessed_data(sl_out_dir, study, preprocessed_data): preprocessed_data) click.echo("Preprocessed data %s successfully updated" % ppd.id) - -@db.command() -def update_raw_data(): - """""" - rd = update_raw_data_from_cmd() - click.echo("Raw data %s successfully updated" % rd.id) - # ############################################################################# # PORTAL COMMANDS # ############################################################################# From c7a7ad46bcfff90812c55678280286453dada5a0 Mon Sep 17 00:00:00 2001 From: Jose Navas Date: Wed, 30 Sep 2015 15:34:54 -0700 Subject: [PATCH 08/12] Addressing comments --- qiita_db/commands.py | 4 ++-- scripts/qiita | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/qiita_db/commands.py b/qiita_db/commands.py index 0533305ff..96199827e 100644 --- a/qiita_db/commands.py +++ b/qiita_db/commands.py @@ -334,13 +334,13 @@ def update_raw_data_from_cmd(filepaths, filepath_types, study_id, rd_id=None): If rd_id is provided and it does not belong to the given study """ if len(filepaths) != len(filepath_types): - raise ValueError("Please provide exactly one filepath_type for each" + raise ValueError("Please provide exactly one filepath_type for each " "and every filepath") with TRN: study = Study(study_id) raw_data_ids = study.raw_data() if not raw_data_ids: - raise ValueError("Study %s does not have any raw data" % study_id) + raise ValueError("Study %d does not have any raw data" % study_id) if rd_id: if rd_id not in raw_data_ids: diff --git a/scripts/qiita b/scripts/qiita index aefd5f0fe..5e2af44f4 100755 --- a/scripts/qiita +++ b/scripts/qiita @@ -287,9 +287,9 @@ def load_parameters(fp, table, name): @click.option('--fp_type', required=True, multiple=True, help='Describes the ' 'contents of the file. Pass one fp_type per fp.', type=click.Choice(get_filepath_types().keys())) -@click.option('--study', required=True, type=int, +@click.option('--study', required=True, type=click.IntRange(1), help='Study whose raw data will be updated') -@click.option('--raw_data', required=False, type=int, +@click.option('--raw_data', required=False, type=click.IntRange(1), help='Raw data to be updated. If not passed, the raw data with ' 'lowest id in the study will be updated.') def update_raw_data(fp, fp_type, study, raw_data): From 0f8ecf72a7aa1c8db6ec54493dc2ab741992b703 Mon Sep 17 00:00:00 2001 From: Jose Navas Date: Wed, 30 Sep 2015 15:36:17 -0700 Subject: [PATCH 09/12] Adding note to the CHANGELOG.md --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0d7504539..5f0c34062 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ Version 0.2.0-dev (Changes since version 0.2.0 go here) ------------------------------------------------------- * Users can now change values and add samples and/or columns to sample and prep templates using the Update button (see the prep template and sample template tabs). +* The raw files of a RawData can be now updated using the `qiita db update_raw_data` CLI command. Version 0.2.0 (2015-08-25) -------------------------- From 8e333e523ff4f7c14f2fb48fe5befcf16196c0c7 Mon Sep 17 00:00:00 2001 From: Jose Navas Date: Wed, 30 Sep 2015 15:55:37 -0700 Subject: [PATCH 10/12] Adding specific error instead of a general KeyError --- qiita_db/commands.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/qiita_db/commands.py b/qiita_db/commands.py index 96199827e..d833e3bcd 100644 --- a/qiita_db/commands.py +++ b/qiita_db/commands.py @@ -353,7 +353,15 @@ def update_raw_data_from_cmd(filepaths, filepath_types, study_id, rd_id=None): raw_data = RawData(sorted(raw_data_ids)[0]) filepath_types_dict = get_filepath_types() - filepath_types = [filepath_types_dict[x] for x in filepath_types] + try: + filepath_types = [filepath_types_dict[x] for x in filepath_types] + except KeyError: + supported_types = filepath_types_dict.keys() + unsupported_types = set(filepath_types).difference(supported_types) + raise ValueError( + "Some filepath types provided are not recognized (%s). " + "Please choose from: %s" + % (', '.join(unsupported_types), ', '.join(supported_types))) fps = raw_data.get_filepaths() sql = "DELETE FROM qiita.raw_filepath WHERE raw_data_id = %s" From da2f05f910844e8197e354535b85be90807103c6 Mon Sep 17 00:00:00 2001 From: Jose Navas Date: Wed, 30 Sep 2015 16:01:05 -0700 Subject: [PATCH 11/12] Cleaning up the environment --- qiita_db/test/test_commands.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/qiita_db/test/test_commands.py b/qiita_db/test/test_commands.py index 0fdaa42e5..37fdada7f 100644 --- a/qiita_db/test/test_commands.py +++ b/qiita_db/test/test_commands.py @@ -31,7 +31,8 @@ from qiita_db.user import User from qiita_db.data import PreprocessedData, RawData from qiita_db.util import (get_count, check_count, get_db_files_base_dir, - get_mountpoint, compute_checksum) + get_mountpoint, compute_checksum, + get_files_from_uploads_folders) from qiita_db.metadata_template import PrepTemplate from qiita_core.util import qiita_test_checker from qiita_ware.processing_pipeline import generate_demux_file @@ -496,7 +497,15 @@ def setUp(self): f.write('\n') self._clean_up_files.append(fp) + self.uploaded_files = get_files_from_uploads_folders( + str(self.study.id)) + def tearDown(self): + new_uploaded_files = get_files_from_uploads_folders(str(self.study.id)) + new_files = set(new_uploaded_files).difference(self.uploaded_files) + path_builder = partial(join, get_mountpoint("uploads")[0][1], '1') + for _, fp in new_files: + self._clean_up_files.append(path_builder(fp)) for f in self._clean_up_files: if exists(f): remove(f) From 2a589bd060cf763a5a960f1425781ad5ec1b1019 Mon Sep 17 00:00:00 2001 From: Jose Navas Date: Thu, 8 Oct 2015 15:20:42 -0700 Subject: [PATCH 12/12] Fixing error when updating a sample template with a df with less samples than in the DB - and adding a specific test --- .../metadata_template/base_metadata_template.py | 12 ++++-------- .../metadata_template/test/test_sample_template.py | 13 +++++++++++++ 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/qiita_db/metadata_template/base_metadata_template.py b/qiita_db/metadata_template/base_metadata_template.py index d5b61079a..f027f6aad 100644 --- a/qiita_db/metadata_template/base_metadata_template.py +++ b/qiita_db/metadata_template/base_metadata_template.py @@ -1160,19 +1160,15 @@ def update(self, md_template): % ', '.join(columns_diff)) # In order to speed up some computation, let's compare only the - # common columns. current_map.columns is a superset of - # new_map.columns, so this will not fail - current_map = current_map[new_map.columns] + # common columns and rows. current_map.columns and + # current_map.index are supersets of new_map.columns and + # new_map.index, respectivelly, so this will not fail + current_map = current_map[new_map.columns].loc[new_map.index] # Get the values that we need to change # diff_map is a DataFrame that hold boolean values. If a cell is # True, means that the new_map is different from the current_map # while False means that the cell has the same value - # In order to compare them, they've to be identically labeled, so - # we need to sort the 'index' axis to be identically labeled. The - # 'column' axis is already the same given the previous line of code - current_map.sort_index(axis='index', inplace=True) - new_map.sort_index(axis='index', inplace=True) diff_map = current_map != new_map # ne_stacked holds a MultiIndexed DataFrame in which the first # level of indexing is the sample_name and the second one is the diff --git a/qiita_db/metadata_template/test/test_sample_template.py b/qiita_db/metadata_template/test/test_sample_template.py index d49b97153..a3df6790f 100644 --- a/qiita_db/metadata_template/test/test_sample_template.py +++ b/qiita_db/metadata_template/test/test_sample_template.py @@ -1301,6 +1301,19 @@ def test_update(self): with self.assertRaises(QiitaDBError): st.update(self.metadata_dict_updated_column_error) + def test_update_fewer_samples(self): + """Updates using a dataframe with less samples that in the DB""" + st = SampleTemplate.create(self.metadata, self.new_study) + new_metadata = pd.DataFrame.from_dict( + {'Sample1': {'physical_specimen_location': 'CHANGE'}}, + orient='index') + exp = {s_id: st[s_id]._to_dict() for s_id in st} + s_id = '%d.Sample1' % self.new_study.id + exp[s_id]['physical_specimen_location'] = 'CHANGE' + npt.assert_warns(QiitaDBWarning, st.update, new_metadata) + obs = {s_id: st[s_id]._to_dict() for s_id in st} + self.assertEqual(obs, exp) + def test_update_numpy(self): """Update values in existing mapping file with numpy values""" metadata_dict = {