diff --git a/qiita_db/test/test_meta_util.py b/qiita_db/test/test_meta_util.py index c7fd8cf71..7fff5cdf6 100644 --- a/qiita_db/test/test_meta_util.py +++ b/qiita_db/test/test_meta_util.py @@ -98,7 +98,7 @@ def test_validate_filepath_access_by_user(self): self.assertTrue(qdb.meta_util.validate_filepath_access_by_user( admin, i[0])) - # returning to origina sharing + # returning to original sharing qdb.study.Study(1).share(user) qdb.analysis.Analysis(1).share(user) qdb.study.Study.delete(study.id) diff --git a/qiita_db/util.py b/qiita_db/util.py index cbee92485..07fbc9167 100644 --- a/qiita_db/util.py +++ b/qiita_db/util.py @@ -896,12 +896,12 @@ def filepath_id_to_rel_path(filepath_id): LEFT JOIN qiita.artifact_filepath USING (filepath_id) WHERE filepath_id = %s""" qdb.sql_connection.TRN.add(sql, [filepath_id]) + # It should be only one row mp, fp, sd, a_id = qdb.sql_connection.TRN.execute_fetchindex()[0] if sd: result = join(mp, str(a_id), fp) else: result = join(mp, fp) - # It should be only one row return result diff --git a/qiita_pet/handlers/download.py b/qiita_pet/handlers/download.py index bbf10699f..912471754 100644 --- a/qiita_pet/handlers/download.py +++ b/qiita_pet/handlers/download.py @@ -1,10 +1,13 @@ -from tornado.web import authenticated +from tornado.web import authenticated, HTTPError -from os.path import basename +from os.path import basename, getsize, join +from os import walk +from datetime import datetime from .base_handlers import BaseHandler -from qiita_pet.exceptions import QiitaPetAuthorizationError -from qiita_db.util import filepath_id_to_rel_path +from qiita_pet.handlers.api_proxy import study_get_req +from qiita_db.study import Study +from qiita_db.util import filepath_id_to_rel_path, get_db_files_base_dir from qiita_db.meta_util import validate_filepath_access_by_user from qiita_core.util import execute_as_transaction @@ -16,8 +19,9 @@ def get(self, filepath_id): fid = int(filepath_id) if not validate_filepath_access_by_user(self.current_user, fid): - raise QiitaPetAuthorizationError( - self.current_user, 'filepath id %s' % str(fid)) + raise HTTPError( + 404, "%s doesn't have access to " + "filepath_id: %s" % (self.current_user.email, str(fid))) relpath = filepath_id_to_rel_path(fid) fname = basename(relpath) @@ -37,3 +41,85 @@ def get(self, filepath_id): 'attachment; filename=%s' % fname) self.finish() + + +class DownloadStudyBIOMSHandler(BaseHandler): + @authenticated + @execute_as_transaction + def get(self, study_id): + study_id = int(study_id) + # Check access to study + study_info = study_get_req(study_id, self.current_user.id) + + if study_info['status'] != 'success': + raise HTTPError(405, "%s: %s, %s" % (study_info['message'], + self.current_user.email, + str(study_id))) + + study = Study(study_id) + user = self.current_user + basedir = get_db_files_base_dir() + basedir_len = len(basedir) + 1 + # loop over artifacts and retrieve those that we have access to + to_download = [] + vfabu = validate_filepath_access_by_user + for a in study.artifacts(): + if a.artifact_type == 'BIOM': + to_add = True + for i, (fid, path, data_type) in enumerate(a.filepaths): + # validate access only of the first artifact filepath, + # the rest have the same permissions + if (i == 0 and not vfabu(user, fid)): + to_add = False + break + # ignore if tgz as they could create problems and the + # raw data is in the folder + if data_type == 'tgz': + continue + if data_type == 'directory': + # If we have a directory, we actually need to list + # all the files from the directory so NGINX can + # actually download all of them + for dp, _, fps in walk(path): + for fname in fps: + fullpath = join(dp, fname) + spath = fullpath + if fullpath.startswith(basedir): + spath = fullpath[basedir_len:] + to_download.append((fullpath, spath, spath)) + elif path.startswith(basedir): + spath = path[basedir_len:] + to_download.append((path, spath, spath)) + else: + # We are not aware of any case that can trigger this + # situation, but we wanted to be overly cautious + # There is no test for this line cause we don't know + # how to trigger it + to_download.append((path, path, path)) + + if to_add: + for pt in a.prep_templates: + qmf = pt.qiime_map_fp + if qmf is not None: + sqmf = qmf + if qmf.startswith(basedir): + sqmf = qmf[basedir_len:] + to_download.append( + (qmf, sqmf, 'mapping_files/%s_mapping_file.txt' + % a.id)) + + # If we don't have nginx, write a file that indicates this + all_files = '\n'.join(["- %s /protected/%s %s" % (getsize(fp), sfp, n) + for fp, sfp, n in to_download]) + self.write("%s\n" % all_files) + + zip_fn = 'study_%d_%s.zip' % ( + study_id, datetime.now().strftime('%m%d%y-%H%M%S')) + + self.set_header('Content-Description', 'File Transfer') + self.set_header('Expires', '0') + self.set_header('Cache-Control', 'no-cache') + self.set_header('X-Archive-Files', 'zip') + self.set_header('Content-Disposition', + 'attachment; filename=%s' % zip_fn) + self.finish() diff --git a/qiita_pet/nginx_example.conf b/qiita_pet/nginx_example.conf index f0cca6f81..27dd4d0f5 100644 --- a/qiita_pet/nginx_example.conf +++ b/qiita_pet/nginx_example.conf @@ -18,7 +18,7 @@ http { internal; # CHANGE ME: This should match the BASE_DATA_DIR in your qiita - # config. E.g., + # config. E.g., # alias /Users/username/qiita/qiita_db/support_files/test_data/; alias ; } @@ -30,6 +30,7 @@ http { proxy_set_header Host $host; proxy_set_header X-Real-IP $remote_addr; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header Accept-Encoding identity; } } } diff --git a/qiita_pet/templates/study_base.html b/qiita_pet/templates/study_base.html index 3e16e9fa9..34b604bf4 100644 --- a/qiita_pet/templates/study_base.html +++ b/qiita_pet/templates/study_base.html @@ -239,6 +239,7 @@ Upload Files {% end %} + All QIIME maps and BIOMs
diff --git a/qiita_pet/test/test_download.py b/qiita_pet/test/test_download.py new file mode 100644 index 000000000..5fbc5a4d8 --- /dev/null +++ b/qiita_pet/test/test_download.py @@ -0,0 +1,142 @@ +# ----------------------------------------------------------------------------- +# Copyright (c) 2014--, The Qiita Development Team. +# +# Distributed under the terms of the BSD 3-clause License. +# +# The full license is in the file LICENSE, distributed with this software. +# ----------------------------------------------------------------------------- + +from unittest import main +from mock import Mock +from os.path import exists, isdir, join +from os import remove, makedirs +from shutil import rmtree +from tempfile import mkdtemp + +from biom.util import biom_open +from biom import example_table as et + +from qiita_pet.test.tornado_test_base import TestHandlerBase +from qiita_pet.handlers.base_handlers import BaseHandler +from qiita_db.user import User +from qiita_db.artifact import Artifact +from qiita_db.software import Parameters, Command + + +class TestDownloadHandler(TestHandlerBase): + + def setUp(self): + super(TestDownloadHandler, self).setUp() + + def tearDown(self): + super(TestDownloadHandler, self).tearDown() + + def test_download(self): + # check success + response = self.get('/download/1') + self.assertEqual(response.code, 200) + self.assertEqual(response.body, ( + "This installation of Qiita was not equipped with nginx, so it " + "is incapable of serving files. The file you attempted to " + "download is located at raw_data/1_s_G1_L001_sequences.fastq.gz")) + + # failure + response = self.get('/download/1000') + self.assertEqual(response.code, 404) + + +class TestDownloadStudyBIOMSHandler(TestHandlerBase): + + def setUp(self): + super(TestDownloadStudyBIOMSHandler, self).setUp() + self._clean_up_files = [] + + def tearDown(self): + super(TestDownloadStudyBIOMSHandler, self).tearDown() + for fp in self._clean_up_files: + if exists(fp): + if isdir(fp): + rmtree(fp) + else: + remove(fp) + + def test_download_study(self): + tmp_dir = mkdtemp() + self._clean_up_files.append(tmp_dir) + + biom_fp = join(tmp_dir, 'otu_table.biom') + smr_dir = join(tmp_dir, 'sortmerna_picked_otus') + log_dir = join(smr_dir, 'seqs_otus.log') + tgz = join(tmp_dir, 'sortmerna_picked_otus.tgz') + + with biom_open(biom_fp, 'w') as f: + et.to_hdf5(f, "test") + makedirs(smr_dir) + with open(log_dir, 'w') as f: + f.write('\n') + with open(tgz, 'w') as f: + f.write('\n') + + self._clean_up_files.append(tmp_dir) + + files_biom = [(biom_fp, 'biom'), (smr_dir, 'directory'), (tgz, 'tgz')] + + params = Parameters.from_default_params( + Command(3).default_parameter_sets.next(), {'input_data': 1}) + a = Artifact.create(files_biom, "BIOM", parents=[Artifact(2)], + processing_parameters=params) + for _, fp, _ in a.filepaths: + self._clean_up_files.append(fp) + + response = self.get('/download_study_bioms/1') + self.assertEqual(response.code, 200) + exp = ( + '- 1256812 /protected/processed_data/1_study_1001_closed_' + 'reference_otu_table.biom processed_data/1_study_1001_closed_' + 'reference_otu_table.biom\n' + '- 36615 /protected/templates/1_prep_1_qiime_[0-9]*-' + '[0-9]*.txt mapping_files/4_mapping_file.txt\n' + '- 1256812 /protected/processed_data/' + '1_study_1001_closed_reference_otu_table.biom processed_data/' + '1_study_1001_closed_reference_otu_table.biom\n' + '- 36615 /protected/templates/1_prep_1_qiime_[0-9]*-' + '[0-9]*.txt mapping_files/5_mapping_file.txt\n' + '- 1256812 /protected/processed_data/' + '1_study_1001_closed_reference_otu_table_Silva.biom processed_data' + '/1_study_1001_closed_reference_otu_table_Silva.biom\n' + '- 36615 /protected/templates/1_prep_1_qiime_[0-9]*-' + '[0-9]*.txt mapping_files/6_mapping_file.txt\n' + '- 36615 /protected/templates/1_prep_2_qiime_[0-9]*-' + '[0-9]*.txt mapping_files/7_mapping_file.txt\n' + '- 39752 /protected/BIOM/{0}/otu_table.biom ' + 'BIOM/{0}/otu_table.biom\n' + '- 1 /protected/BIOM/{0}/sortmerna_picked_otus/seqs_otus.log ' + 'BIOM/{0}/sortmerna_picked_otus/seqs_otus.log\n' + '- 36615 /protected/templates/1_prep_1_qiime_[0-9]*-[0-9]*.' + 'txt mapping_files/{0}_mapping_file.txt\n'.format(a.id)) + self.assertRegexpMatches(response.body, exp) + + response = self.get('/download_study_bioms/200') + self.assertEqual(response.code, 405) + + # changing user so we can test the failures + BaseHandler.get_current_user = Mock( + return_value=User("demo@microbio.me")) + response = self.get('/download_study_bioms/1') + self.assertEqual(response.code, 405) + + a.visibility = 'public' + response = self.get('/download_study_bioms/1') + self.assertEqual(response.code, 200) + exp = ( + '- 39752 /protected/BIOM/{0}/otu_table.biom ' + 'BIOM/{0}/otu_table.biom\n' + '- 1 /protected/BIOM/{0}/sortmerna_picked_otus/seqs_otus.log ' + 'BIOM/{0}/sortmerna_picked_otus/seqs_otus.log\n' + '- 36615 /protected/templates/1_prep_1_qiime_[0-9]*-[0-9]*.' + 'txt mapping_files/{0}_mapping_file.txt\n'.format(a.id)) + self.assertRegexpMatches(response.body, exp) + + +if __name__ == '__main__': + main() diff --git a/qiita_pet/webserver.py b/qiita_pet/webserver.py index 42952d478..15185601d 100644 --- a/qiita_pet/webserver.py +++ b/qiita_pet/webserver.py @@ -38,7 +38,8 @@ from qiita_pet.handlers.logger_handlers import LogEntryViewerHandler from qiita_pet.handlers.upload import UploadFileHandler, StudyUploadFileHandler from qiita_pet.handlers.stats import StatsHandler -from qiita_pet.handlers.download import DownloadHandler +from qiita_pet.handlers.download import ( + DownloadHandler, DownloadStudyBIOMSHandler) from qiita_pet.handlers.prep_template import PrepTemplateHandler from qiita_pet.handlers.ontology import OntologyHandler from qiita_db.handlers.processing_job import ( @@ -144,6 +145,7 @@ def __init__(self): (r"/check_study/", CreateStudyAJAX), (r"/stats/", StatsHandler), (r"/download/(.*)", DownloadHandler), + (r"/download_study_bioms/(.*)", DownloadStudyBIOMSHandler), (r"/vamps/(.*)", VAMPSHandler), # Plugin handlers - the order matters here so do not change # qiita_db/jobs/(.*) should go after any of the