From d8c51c80e6492f9a750042346bc9b6bcf136ba55 Mon Sep 17 00:00:00 2001 From: Joshua Shorenstein Date: Fri, 20 Jun 2014 13:42:24 -0600 Subject: [PATCH 1/6] download and add data to cannabis study --- qiita_db/environment_manager.py | 44 ++++++++++++++++++++++++++++++++- 1 file changed, 43 insertions(+), 1 deletion(-) diff --git a/qiita_db/environment_manager.py b/qiita_db/environment_manager.py index fa35f68fb..a6e87fdc5 100644 --- a/qiita_db/environment_manager.py +++ b/qiita_db/environment_manager.py @@ -5,12 +5,19 @@ # # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- - +from tempfile import mkdtemp +from tarfile import open as taropen +from gzip import open as gzopen +from urllib import urlretrieve from os.path import abspath, dirname, join +from shutil import rmtree from functools import partial from psycopg2 import connect from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT +from qiita_db.study import Study +from qiita_db.data import PreprocessedData, ProcessedData + get_support_file = partial(join, join(dirname(abspath(__file__)), 'support_files')) @@ -165,6 +172,41 @@ def make_demo_environment(base_data_dir, base_work_dir, user, password, host): cur.close() conn.close() + # download files from thebeast + url = ("ftp://thebeast.colorado.edu/pub/QIIME_DB_Public_Studies/study_1001" + "_split_library_seqs_and_mapping.tgz") + outdir = mkdtemp() + try: + urlretrieve(url, join(outdir, "study_1001.tar.gz")) + except Exception, e: + raise("Error: DOWNLOAD FAILED", e) + rmtree(outdir) + + # untar the files + with taropen(join(outdir, "study_1001.tar.gz")) as tar: + tar.extractall(outdir) + # un-gzip sequence file + with gzopen(join(outdir, "study_1001_split_library_seqs_and_mapping/" + "study_1001_split_library_seqs.fna.gz")) as gz: + with open(join(outdir, "study_1001_split_library_seqs_and_mapping/" + "study_1001_split_library_seqs.fna"), 'w') as fout: + fout.write(gz.read()) + + # add the preprocessed and procesed data to the study + preproc = PreprocessedData.create( + Study(1), "preprocessed_sequence_illumina_params", 1, + [(join(outdir, "study_1001_split_library_seqs_and_mapping/" + "study_1001_split_library_seqs.fna"), 4)]) + + ProcessedData.create( + "processed_params_uclust", 1, + [(join(outdir, "study_1001_split_library_seqs_and_mapping/study_1001_" + "closed_reference_otu_table.biom"), 6)], + preprocessed_data=preproc) + + # clean up after ourselves + rmtree(outdir) + def drop_demo_environment(user, password, host): r"""Drops the demo environment. From d98bb87331d0b16350164dcf7268a8ad60a49de1 Mon Sep 17 00:00:00 2001 From: Joshua Shorenstein Date: Fri, 20 Jun 2014 14:16:56 -0600 Subject: [PATCH 2/6] copy demo data over test data --- qiita_db/environment_manager.py | 38 +++++++++++++++++++-------------- 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/qiita_db/environment_manager.py b/qiita_db/environment_manager.py index a6e87fdc5..be4b92215 100644 --- a/qiita_db/environment_manager.py +++ b/qiita_db/environment_manager.py @@ -9,14 +9,16 @@ from tarfile import open as taropen from gzip import open as gzopen from urllib import urlretrieve +from os import remove from os.path import abspath, dirname, join -from shutil import rmtree +from shutil import rmtree, move from functools import partial from psycopg2 import connect from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT from qiita_db.study import Study from qiita_db.data import PreprocessedData, ProcessedData +from qiita_db.util import get_db_files_base_dir get_support_file = partial(join, join(dirname(abspath(__file__)), 'support_files')) @@ -176,6 +178,7 @@ def make_demo_environment(base_data_dir, base_work_dir, user, password, host): url = ("ftp://thebeast.colorado.edu/pub/QIIME_DB_Public_Studies/study_1001" "_split_library_seqs_and_mapping.tgz") outdir = mkdtemp() + basedir = join(outdir, "study_1001_split_library_seqs_and_mapping/") try: urlretrieve(url, join(outdir, "study_1001.tar.gz")) except Exception, e: @@ -186,23 +189,18 @@ def make_demo_environment(base_data_dir, base_work_dir, user, password, host): with taropen(join(outdir, "study_1001.tar.gz")) as tar: tar.extractall(outdir) # un-gzip sequence file - with gzopen(join(outdir, "study_1001_split_library_seqs_and_mapping/" - "study_1001_split_library_seqs.fna.gz")) as gz: - with open(join(outdir, "study_1001_split_library_seqs_and_mapping/" - "study_1001_split_library_seqs.fna"), 'w') as fout: + with gzopen(join(basedir, "study_1001_split_library_seqs.fna.gz")) as gz: + with open(join(basedir, "seqs.fna"), 'w') as fout: fout.write(gz.read()) - # add the preprocessed and procesed data to the study - preproc = PreprocessedData.create( - Study(1), "preprocessed_sequence_illumina_params", 1, - [(join(outdir, "study_1001_split_library_seqs_and_mapping/" - "study_1001_split_library_seqs.fna"), 4)]) - - ProcessedData.create( - "processed_params_uclust", 1, - [(join(outdir, "study_1001_split_library_seqs_and_mapping/study_1001_" - "closed_reference_otu_table.biom"), 6)], - preprocessed_data=preproc) + # copy the preprocessed and procesed data to the study + dbbase = get_db_files_base_dir() + remove(join(dbbase, "processed_data/study_1001_closed_reference_otu" + "_table.biom")) + remove(join(dbbase, "preprocessed_data/seqs.fna")) + move(join(basedir, "study_1001_closed_reference_otu_table.biom"), + join(dbbase, "processed_data")) + move(join(basedir, "seqs.fna"), join(dbbase, "preprocessed_data")) # clean up after ourselves rmtree(outdir) @@ -232,6 +230,14 @@ def drop_demo_environment(user, password, host): cur.close() conn.close() + # wipe the overwriiten test files so empty as on repo + base = get_db_files_base_dir() + with open(join(base, "preprocessed_data/seqs.fna")) as fout: + fout.write("") + with open(join(base, "processed_data/study_1001_closed_reference_otu_" + "table.biom")) as fout: + fout.write("") + def make_production_environment(): """TODO: Not implemented""" From 678a5240744a114af84047acbe83e9c730bcb4e1 Mon Sep 17 00:00:00 2001 From: Joshua Shorenstein Date: Fri, 20 Jun 2014 14:30:02 -0600 Subject: [PATCH 3/6] making drop_demo_env work --- qiita_db/environment_manager.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/qiita_db/environment_manager.py b/qiita_db/environment_manager.py index be4b92215..47fe5e37f 100644 --- a/qiita_db/environment_manager.py +++ b/qiita_db/environment_manager.py @@ -218,6 +218,7 @@ def drop_demo_environment(user, password, host): host : str The host where the postgres server is running """ + base = get_db_files_base_dir() # Connect to the postgres server conn = connect(user=user, host=host, password=password) # Set the isolation level to AUTOCOMMIT so we can execute a @@ -231,12 +232,11 @@ def drop_demo_environment(user, password, host): conn.close() # wipe the overwriiten test files so empty as on repo - base = get_db_files_base_dir() - with open(join(base, "preprocessed_data/seqs.fna")) as fout: - fout.write("") + with open(join(base, "preprocessed_data/seqs.fna"), 'w') as fout: + fout.write("\n") with open(join(base, "processed_data/study_1001_closed_reference_otu_" - "table.biom")) as fout: - fout.write("") + "table.biom"), 'w') as fout: + fout.write("\n") def make_production_environment(): From f6aa3f6507eecd1a1cfce2ee254c908dc26611cb Mon Sep 17 00:00:00 2001 From: Joshua Shorenstein Date: Fri, 20 Jun 2014 14:40:34 -0600 Subject: [PATCH 4/6] fix except --- qiita_db/environment_manager.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/qiita_db/environment_manager.py b/qiita_db/environment_manager.py index 47fe5e37f..6af6e429c 100644 --- a/qiita_db/environment_manager.py +++ b/qiita_db/environment_manager.py @@ -181,8 +181,8 @@ def make_demo_environment(base_data_dir, base_work_dir, user, password, host): basedir = join(outdir, "study_1001_split_library_seqs_and_mapping/") try: urlretrieve(url, join(outdir, "study_1001.tar.gz")) - except Exception, e: - raise("Error: DOWNLOAD FAILED", e) + except: + raise IOError("Error: DOWNLOAD FAILED") rmtree(outdir) # untar the files From a7f2a0f2eac6805edca5f5bc99ad63cf6af7bb88 Mon Sep 17 00:00:00 2001 From: Joshua Shorenstein Date: Fri, 20 Jun 2014 14:47:34 -0600 Subject: [PATCH 5/6] fix python3 import error --- qiita_db/environment_manager.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/qiita_db/environment_manager.py b/qiita_db/environment_manager.py index 6af6e429c..fd19165df 100644 --- a/qiita_db/environment_manager.py +++ b/qiita_db/environment_manager.py @@ -8,11 +8,15 @@ from tempfile import mkdtemp from tarfile import open as taropen from gzip import open as gzopen -from urllib import urlretrieve from os import remove from os.path import abspath, dirname, join from shutil import rmtree, move from functools import partial +try: + from urllib import urlretrieve +except ImportError: # python3 + from urllib.request import urlretrieve + from psycopg2 import connect from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT From ae9737bfb2e727a503ad0abcbd728785e85985ea Mon Sep 17 00:00:00 2001 From: Joshua Shorenstein Date: Fri, 20 Jun 2014 14:48:25 -0600 Subject: [PATCH 6/6] remove unneded imports --- qiita_db/environment_manager.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/qiita_db/environment_manager.py b/qiita_db/environment_manager.py index fd19165df..248f02cca 100644 --- a/qiita_db/environment_manager.py +++ b/qiita_db/environment_manager.py @@ -20,8 +20,6 @@ from psycopg2 import connect from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT -from qiita_db.study import Study -from qiita_db.data import PreprocessedData, ProcessedData from qiita_db.util import get_db_files_base_dir get_support_file = partial(join, join(dirname(abspath(__file__)),