From 84199570fa78f723e03bf6275a505e75bcf0c7d4 Mon Sep 17 00:00:00 2001 From: Emily TerAvest Date: Thu, 19 Jun 2014 12:34:57 -0600 Subject: [PATCH 1/8] save before merge --- qiita_db/commands.py | 6 ++++++ scripts/qiita_db | 4 ++++ 2 files changed, 10 insertions(+) diff --git a/qiita_db/commands.py b/qiita_db/commands.py index 067cb7a97..d8f0817f3 100644 --- a/qiita_db/commands.py +++ b/qiita_db/commands.py @@ -55,3 +55,9 @@ def make_study_from_cmd(owner, title, info): efo_ids = [x.strip() for x in efo_ids.split(',')] Study.create(User(owner), title, efo_ids, infodict) + + +def import_preprossed_data(study_id, raw_data_id, filedir, filetype, + params_table, params_file): + pass + diff --git a/scripts/qiita_db b/scripts/qiita_db index 5cc25499e..db4b5ef7b 100755 --- a/scripts/qiita_db +++ b/scripts/qiita_db @@ -119,6 +119,10 @@ def drop_demo_env(user, host): """Drops the demo database environment.""" drop_demo_environment(user, None, host) + +@click.command() +@click.option('--study_id', help="The study id the raw data is " + "associated with") qiita_db.add_command(make_test_env) qiita_db.add_command(clean_test_env) qiita_db.add_command(drop_test_env) From f12dfb66684aee6562c1a861cf3b38e097f5e0cb Mon Sep 17 00:00:00 2001 From: Emily TerAvest Date: Thu, 19 Jun 2014 15:13:48 -0600 Subject: [PATCH 2/8] waiting on other pull requests --- qiita_db/commands.py | 3 +-- qiita_db/test/test_commands.py | 31 ++++++++++++++++++++++++++++++- scripts/qiita_db | 15 ++++++++++++++- 3 files changed, 45 insertions(+), 4 deletions(-) diff --git a/qiita_db/commands.py b/qiita_db/commands.py index d8f0817f3..f7e036043 100644 --- a/qiita_db/commands.py +++ b/qiita_db/commands.py @@ -57,7 +57,6 @@ def make_study_from_cmd(owner, title, info): Study.create(User(owner), title, efo_ids, infodict) -def import_preprossed_data(study_id, raw_data_id, filedir, filetype, +def import_preprossed_data(study_id, filedir, filetype, params_table, params_file): pass - diff --git a/qiita_db/test/test_commands.py b/qiita_db/test/test_commands.py index 58b896c12..ebd64dbb5 100644 --- a/qiita_db/test/test_commands.py +++ b/qiita_db/test/test_commands.py @@ -1,5 +1,6 @@ from unittest import TestCase, main from future.utils.six import StringIO +from tempfile import mkstemp, mkdtemp try: # Python 2 from ConfigParser import NoOptionError @@ -7,7 +8,7 @@ # Python 3 from configparser import NoOptionError -from qiita_db.commands import make_study_from_cmd +from qiita_db.commands import make_study_from_cmd, import_preprocessed_data from qiita_db.study import StudyPerson from qiita_db.user import User from qiita_core.util import qiita_test_checker @@ -35,6 +36,34 @@ def test_make_study_from_cmd(self): with self.assertRaises(NoOptionError): make_study_from_cmd('test@test.com', 'newstudy2', fh2) + +@qiita_test_checker() +class TestImportPreprocessedData(TestCase): + def setUp(self): + self.tmpdir = mkdtemp() + fd, file1 = mkstemp(dir=self.tmpdir) + fd.close() + fd, file2 = mkstemp(dir=self.tmpdir) + fd.close() + fd, file3 = mkstemp(dir=self.tmpdir) + fd.close() + with open(file1, "w") as f: + f.write("\n") + with open(file2, "w") as f: + f.write("\n") + + def test_import_preprocessed_data(self): + + import_preprocessed_data(1, self.tmpdir, 1, + 'preprocessed_sequence_illumina_params', + 1, False) + sql = ("select preprocessed_data_id from qitta.preprocessed_data" + "where study_id = %s and preprocessed_params_table = %s") + study_ids = self.conn_handler.execute_fetchall( + sql, ('1', 'preprocessed_sequence_illumina_params')) + self.assertEqual(len(study_ids), 2) + + CONFIG_1 = """[required] timeseries_type_id = 1 metadata_complete = True diff --git a/scripts/qiita_db b/scripts/qiita_db index 6cabd06d9..d5f639120 100755 --- a/scripts/qiita_db +++ b/scripts/qiita_db @@ -10,7 +10,7 @@ import click -from qiita_db.commands import make_study_from_cmd +from qiita_db.commands import make_study_from_cmd, import_preprocessed_data @click.group() @@ -28,5 +28,18 @@ def insert_study_to_db(owner, title, info): make_study_from_cmd(owner, title, info) +@qiita_db.command() +@click.option('--study_id', help="Study id associated with data") +@click.option('--params_table', help="Name of the paramaters table for the" + "preprocessed dtata") +@click.option('--filedir', help="Directory containing preprocessed data") +@click.option('--filetype', help="Filepath_type_id of the files") +@click.option('--params_id', + help="id in the paramater table associated with the parameters") +@click.option('--submit_to_insdc', help="True means submitted to insdc") +def insert_preprocessed_data(study_id, filedir, filetype, params_table, + params_id, submit_to_insdc): + import_preprocessed_data(study_id, filedir, filetype, + params_table, params_id, submit_to_insdc) if __name__ == '__main__': qiita_db() From 58c184a5e2fe0b16cda91ebab2ad0da8ba0fa273 Mon Sep 17 00:00:00 2001 From: Jose Navas Date: Thu, 19 Jun 2014 19:43:53 -0600 Subject: [PATCH 3/8] Finishing up function --- qiita_db/commands.py | 13 +++++++++---- scripts/qiita_db | 10 ++++++---- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/qiita_db/commands.py b/qiita_db/commands.py index 3e4ea1f2d..78348d014 100644 --- a/qiita_db/commands.py +++ b/qiita_db/commands.py @@ -7,6 +7,8 @@ # ----------------------------------------------------------------------------- from functools import partial +from os import listdir +from os.path import join try: # Python 2 from ConfigParser import ConfigParser @@ -17,7 +19,7 @@ from .study import Study, StudyPerson from .user import User from .util import get_filetypes, get_filepath_types -from .data import RawData +from .data import RawData, PreprocessedData def make_study_from_cmd(owner, title, info): @@ -66,9 +68,12 @@ def make_study_from_cmd(owner, title, info): Study.create(User(owner), title, efo_ids, infodict) -def import_preprossed_data(study_id, filedir, filetype, - params_table, params_file): - pass +def import_preprossed_data(study_id, filedir, filetype, params_table, + params_id, submitted_to_insdc): + filepaths = [(join(filedir, fp), filetype) for fp in listdir(filedir)] + return PreprocessedData.create(Study(study_id), params_table, params_id, + filepaths, + submitted_to_insdc=submitted_to_insdc) def load_raw_data_cmd(filepaths, filepath_types, filetype, study_ids): diff --git a/scripts/qiita_db b/scripts/qiita_db index be92529f5..a94e26567 100755 --- a/scripts/qiita_db +++ b/scripts/qiita_db @@ -57,11 +57,13 @@ def insert_study_to_db(owner, title, info): @click.option('--filetype', help="Filepath_type_id of the files") @click.option('--params_id', help="id in the paramater table associated with the parameters") -@click.option('--submit_to_insdc', help="True means submitted to insdc") +@click.option('--submitted_to_insdc', is_flag=True, + help="If provided, the preprocessed data have been submitted" + " to insdc") def insert_preprocessed_data(study_id, filedir, filetype, params_table, - params_id, submit_to_insdc): - import_preprocessed_data(study_id, filedir, filetype, - params_table, params_id, submit_to_insdc) + params_id, submitted_to_insdc): + import_preprocessed_data(study_id, filedir, filetype, params_table, + params_id, submitted_to_insdc) if __name__ == '__main__': From 644980918f6624e02baa35e6757faf2a1e5250fd Mon Sep 17 00:00:00 2001 From: Jose Navas Date: Thu, 19 Jun 2014 23:01:08 -0600 Subject: [PATCH 4/8] Adding tests --- qiita_db/test/test_commands.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/qiita_db/test/test_commands.py b/qiita_db/test/test_commands.py index 4ba914b28..f61b0444e 100644 --- a/qiita_db/test/test_commands.py +++ b/qiita_db/test/test_commands.py @@ -74,18 +74,18 @@ def tearDown(self): remove(fp) for dp in self.dirs_to_remove: if exists(dp): - remove(dp) + rmtree(dp) def test_import_preprocessed_data(self): - - import_preprocessed_data(1, self.tmpdir, 1, - 'preprocessed_sequence_illumina_params', - 1, False) - sql = ("select preprocessed_data_id from qitta.preprocessed_data" - "where study_id = %s and preprocessed_params_table = %s") - ppd_ids = self.conn_handler.execute_fetchall( - sql, ('1', 'preprocessed_sequence_illumina_params')) - self.assertEqual(len(ppd_ids), 2) + initial_ppd_count = get_count('qiita.preprocessed_data') + initial_fp_count = get_count('qiita.filepath') + ppd = import_preprocessed_data(1, self.tmpdir, 1, + 'preprocessed_sequence_illumina_params', + 1, False) + self.assertEqual(ppd.id, 3) + self.assertTrue(check_count('qiita.preprocessed_data', + initial_ppd_count + 1)) + self.assertTrue(check_count('qiita.filepath', initial_fp_count+2)) @qiita_test_checker() From 40ea6a51050f6b5d95c134117c16bf06fecca9bf Mon Sep 17 00:00:00 2001 From: Jose Navas Date: Thu, 19 Jun 2014 23:11:50 -0600 Subject: [PATCH 5/8] Cleaning up environment --- qiita_db/test/test_commands.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/qiita_db/test/test_commands.py b/qiita_db/test/test_commands.py index f61b0444e..25feca35e 100644 --- a/qiita_db/test/test_commands.py +++ b/qiita_db/test/test_commands.py @@ -54,19 +54,20 @@ def test_make_study_from_cmd(self): class TestImportPreprocessedData(TestCase): def setUp(self): self.tmpdir = mkdtemp() - fd, file1 = mkstemp(dir=self.tmpdir) + fd, self.file1 = mkstemp(dir=self.tmpdir) close(fd) - fd, file2 = mkstemp(dir=self.tmpdir) + fd, self.file2 = mkstemp(dir=self.tmpdir) close(fd) - with open(file1, "w") as f: + with open(self.file1, "w") as f: f.write("\n") - with open(file2, "w") as f: + with open(self.file2, "w") as f: f.write("\n") - self.files_to_remove = [file1, file2] + self.files_to_remove = [self.file1, self.file2] self.dirs_to_remove = [self.tmpdir] - self.db_test_raw_dir = join(get_db_files_base_dir(), 'raw_data') + self.db_test_ppd_dir = join(get_db_files_base_dir(), + 'preprocessed_data') def tearDown(self): for fp in self.files_to_remove: @@ -82,6 +83,12 @@ def test_import_preprocessed_data(self): ppd = import_preprocessed_data(1, self.tmpdir, 1, 'preprocessed_sequence_illumina_params', 1, False) + self.files_to_remove.append( + join(self.db_test_ppd_dir, + '%d_%s' % (ppd.id, basename(self.file1)))) + self.files_to_remove.append( + join(self.db_test_ppd_dir, + '%d_%s' % (ppd.id, basename(self.file2)))) self.assertEqual(ppd.id, 3) self.assertTrue(check_count('qiita.preprocessed_data', initial_ppd_count + 1)) From 17181c15d734ce1451558e30772fd9c3a1a3be49 Mon Sep 17 00:00:00 2001 From: Emily TerAvest Date: Fri, 20 Jun 2014 10:50:42 -0600 Subject: [PATCH 6/8] changes before merge --- qiita_db/util.py | 10 ++++++++++ scripts/qiita_db | 38 +++++++++++++++++++++++--------------- 2 files changed, 33 insertions(+), 15 deletions(-) diff --git a/qiita_db/util.py b/qiita_db/util.py index ccee908ec..659a580fe 100644 --- a/qiita_db/util.py +++ b/qiita_db/util.py @@ -467,3 +467,13 @@ def check_count(table, exp_count): """ obs_count = get_count(table) return obs_count == exp_count + + +def get_preprocessed_params_tables(): + """returns a list of preprocessed parmaeter tables + + Returns + ------- + list or str + """ + sql = ("SELECT * FROM information_schema" diff --git a/scripts/qiita_db b/scripts/qiita_db index 228958729..ad0324a3a 100755 --- a/scripts/qiita_db +++ b/scripts/qiita_db @@ -10,9 +10,11 @@ import click -from qiita_db.util import get_filetypes, get_filepath_types -from qiita_db.commands import (sample_template_adder, make_study_from_cmd, - load_raw_data_cmd, import_preprocessed_data) +from qiita_db.util import (get_filetypes, get_filepath_types, + get_preprocessed_params_tables) +from qiita_db.commands import (sample_template_adder, load_study_from_cmd, + load_raw_data_cmd, + load_preprocessed_data_from_cmd) @click.group() @@ -46,24 +48,30 @@ def load_raw_data(fp, fp_type, filetype, study): help="filepath of file with study information in python" "config file format") def insert_study_to_db(owner, title, info): - make_study_from_cmd(owner, title, info) + load_study_from_cmd(owner, title, info) @qiita_db.command() -@click.option('--study_id', help="Study id associated with data") -@click.option('--params_table', help="Name of the paramaters table for the" - "preprocessed data") -@click.option('--filedir', help="Directory containing preprocessed data") -@click.option('--filetype', help="Filepath_type_id of the files") -@click.option('--params_id', +@click.option('--study_id', help="Study id associated with data", + required=True) +@click.option('--params_table', help="Name of the paramaters table for the " + "preprocessed data", required=True, + type=click.Choice(get_preprocessed_params_tables())) +@click.option('--filedir', help="Directory containing preprocessed data", + required=True) +@click.option('--filepathtype', help="Describes the contents of the input " + "files", required=True, + type=click.Choice(get_filepath_types().keys())) +@click.option('--params_id', required=True, help="id in the paramater table associated with the parameters") @click.option('--submitted_to_insdc', is_flag=True, help="If provided, the preprocessed data have been submitted" - " to insdc") -def insert_preprocessed_data(study_id, filedir, filetype, params_table, - params_id, submitted_to_insdc): - import_preprocessed_data(study_id, filedir, filetype, params_table, - params_id, submitted_to_insdc) + " to insdc", required=True) +def insert_preprocessed_data(study_id, filedir, filepathtype, + params_table_name, params_id, submitted_to_insdc): + load_preprocessed_data_from_cmd(study_id, filedir, filepathtype, + params_table_name, + params_id, submitted_to_insdc) @qiita_db.command() From 39238c54311fc5c2583ebbf698681ba3f652269f Mon Sep 17 00:00:00 2001 From: Emily TerAvest Date: Fri, 20 Jun 2014 10:52:27 -0600 Subject: [PATCH 7/8] changes before merge2 --- qiita_db/commands.py | 41 ++++++++++++++++++++++++++++++++++++----- 1 file changed, 36 insertions(+), 5 deletions(-) diff --git a/qiita_db/commands.py b/qiita_db/commands.py index 7dfabf363..84c61cbbd 100644 --- a/qiita_db/commands.py +++ b/qiita_db/commands.py @@ -24,7 +24,19 @@ from .metadata_template import SampleTemplate -def make_study_from_cmd(owner, title, info): +def load_study_from_cmd(owner, title, info): + r"""Adds a study to the database + + Parameters + ---------- + owner : str + The email address of the owner of the study_abstract + title : str + The title of the study_abstract + info : file-like object + File-like object containing study information + + """ # Parse the configuration file config = ConfigParser() config.readfp(info) @@ -70,9 +82,28 @@ def make_study_from_cmd(owner, title, info): Study.create(User(owner), title, efo_ids, infodict) -def import_preprocessed_data(study_id, filedir, filetype, params_table, - params_id, submitted_to_insdc): - filepaths = [(join(filedir, fp), filetype) for fp in listdir(filedir)] +def load_preprocessed_data_from_cmd(study_id, filedir, filepathtype, + params_table, params_id, + submitted_to_insdc): + r"""Adds preprocessed data to the database + + Parameters + ---------- + study_id : int + The study id to which the preprocessed data belongs + filedir : str + Directory path of the preprocessed data + filepathtype: int + The filepath_type_id of the preprecessed data + params_table_name : str + The name of the table which contains the parameters of the + preprocessing + params_id : int + The id of parameters int the params_table + submitted_to_insdc : bool + Has the data been submitted to insdc + """ + filepaths = [(join(filedir, fp), filepathtype) for fp in listdir(filedir)] return PreprocessedData.create(Study(study_id), params_table, params_id, filepaths, submitted_to_insdc=submitted_to_insdc) @@ -86,7 +117,7 @@ def sample_template_adder(sample_temp_path, study_id): sample_temp_path : str Path to the sample template file study_id : int - The study id to wich the sample template belongs to + The study id to which the sample template belongs to """ sample_temp = pd.DataFrame.from_csv(sample_temp_path, sep='\t', infer_datetime_format=True) From 6c572fc526c0ad733188aef5d8609da2e81c3342 Mon Sep 17 00:00:00 2001 From: Emily TerAvest Date: Fri, 20 Jun 2014 11:36:56 -0600 Subject: [PATCH 8/8] should be good --- qiita_db/commands.py | 8 +++++--- qiita_db/test/test_commands.py | 2 +- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/qiita_db/commands.py b/qiita_db/commands.py index ff2e6574d..e569b14e6 100644 --- a/qiita_db/commands.py +++ b/qiita_db/commands.py @@ -94,8 +94,8 @@ def load_preprocessed_data_from_cmd(study_id, filedir, filepathtype, The study id to which the preprocessed data belongs filedir : str Directory path of the preprocessed data - filepathtype: int - The filepath_type_id of the preprecessed data + filepathtype: str + The filepath_type of the preprecessed data params_table_name : str The name of the table which contains the parameters of the preprocessing @@ -104,7 +104,9 @@ def load_preprocessed_data_from_cmd(study_id, filedir, filepathtype, submitted_to_insdc : bool Has the data been submitted to insdc """ - filepaths = [(join(filedir, fp), filepathtype) for fp in listdir(filedir)] + fp_types_dict = get_filepath_types() + fp_type = fp_types_dict[filepathtype] + filepaths = [(join(filedir, fp), fp_type) for fp in listdir(filedir)] return PreprocessedData.create(Study(study_id), params_table, params_id, filepaths, submitted_to_insdc=submitted_to_insdc) diff --git a/qiita_db/test/test_commands.py b/qiita_db/test/test_commands.py index 8864a8658..ba222963c 100644 --- a/qiita_db/test/test_commands.py +++ b/qiita_db/test/test_commands.py @@ -82,7 +82,7 @@ def test_import_preprocessed_data(self): initial_ppd_count = get_count('qiita.preprocessed_data') initial_fp_count = get_count('qiita.filepath') ppd = load_preprocessed_data_from_cmd( - 1, self.tmpdir, 1, 'preprocessed_sequence_illumina_params', + 1, self.tmpdir, 'tar', 'preprocessed_sequence_illumina_params', 1, False) self.files_to_remove.append( join(self.db_test_ppd_dir,