Merge 97e96cc into ab4644c

antgonza · web-flow · commit e886a8db8077 · 2025-11-21T16:13:12.000Z
diff --git a/qiita_pet/handlers/download.py b/qiita_pet/handlers/download.py
@@ -523,19 +523,27 @@ def get(self):
         valid_data = ['raw', 'biom'] + templates
 
         to_download = []
+        # for this block to work we need 3 main inputs: prep_id/sample_id, data
+        # and data_type - if one is missing raise an error, if both
+        # prep_id/sample_id are defined or data_type doesn't exist in qiita
+        # we should error
         if data is None or (study_id is None and prep_id is None) or \
                 data not in valid_data:
             raise HTTPError(422, reason='You need to specify both data (the '
                             'data type you want to download - %s) and '
                             'study_id or prep_id' % '/'.join(valid_data))
-        elif data_type is not None and data_type not in dtypes:
+        if data_type is not None and data_type not in dtypes:
             raise HTTPError(422, reason='Not a valid data_type. Valid types '
                             'are: %s' % ', '.join(dtypes))
-        elif data in templates and prep_id is None and study_id is None:
+        if data in templates and prep_id is None and study_id is None:
             raise HTTPError(422, reason='If downloading a sample or '
                             'preparation file you need to define study_id or'
                             ' prep_id')
-        elif data in templates:
+
+        # if we get here, then we have two main options: templates or raw/biom;
+        # however, for raw/biom we need to retrieve the data via the study_id
+        # or the prep_id so splitting the next block in study_id/pre_id
+        if data in templates:
             if data_type is not None:
                 raise HTTPError(422, reason='If requesting an information '
                                 'file you cannot specify the data_type')
@@ -572,47 +580,71 @@ def get(self):
                 fname, datetime.now().strftime('%m%d%y-%H%M%S'))
             self._set_nginx_headers(zip_fn)
         else:
-            study_id = int(study_id)
-            try:
-                study = Study(study_id)
-            except QiitaDBUnknownIDError:
-                raise HTTPError(422, reason='Study does not exist')
+            # depending on if we have a study_id or a prep_id, instantiate
+            # the study for basic permission validations
+            if study_id is not None:
+                study_id = int(study_id)
+                try:
+                    study = Study(study_id)
+                except QiitaDBUnknownIDError:
+                    raise HTTPError(422, reason='Study does not exist')
+                zip_fn = 'study_%d_%s_%s.zip' % (
+                    study_id, data, datetime.now().strftime(
+                        '%m%d%y-%H%M%S'))
             else:
-                public_raw_download = study.public_raw_download
-                if study.status != 'public':
-                    raise HTTPError(404, reason='Study is not public. If this '
-                                    'is a mistake contact: %s' %
-                                    qiita_config.help_email)
-                elif data == 'raw' and not public_raw_download:
+                prep_id = int(prep_id)
+                try:
+                    prep = PrepTemplate(prep_id)
+                except QiitaDBUnknownIDError:
+                    raise HTTPError(422, reason='Prep does not exist')
+                study = Study(prep.study_id)
+                zip_fn = 'prep_%d_%s_%s.zip' % (
+                        prep_id, data, datetime.now().strftime(
+                            '%m%d%y-%H%M%S'))
+
+            public_raw_download = study.public_raw_download
+            # just to be 100% that the data is public, let's start
+            # with checking that the study is actually public
+            if study.status != 'public':
+                raise HTTPError(404, reason='Study is not public. If this '
+                                'is a mistake contact: %s' %
+                                qiita_config.help_email)
+            # now let's check that if the data is raw, the study's
+            # public_raw_download flag is on
+            if data == 'raw':
+                if not public_raw_download:
                     raise HTTPError(422, reason='No raw data access. If this '
                                     'is a mistake contact: %s'
                                     % qiita_config.help_email)
-                else:
-                    # raw data
+                if study_id is not None:
                     artifacts = [a for a in study.artifacts(dtype=data_type)
                                  if not a.parents]
-                    # bioms
-                    if data == 'biom':
-                        artifacts = study.artifacts(
-                            dtype=data_type, artifact_type='BIOM')
-                    for a in artifacts:
-                        if a.visibility != 'public' or a.has_human:
-                            continue
-                        to_download.extend(self._list_artifact_files_nginx(a))
-
-                if not to_download:
-                    raise HTTPError(422, reason='Nothing to download. If '
-                                    'this is a mistake contact: %s'
-                                    % qiita_config.help_email)
                 else:
-                    self._write_nginx_file_list(to_download)
-
-                    zip_fn = 'study_%d_%s_%s.zip' % (
-                        study_id, data, datetime.now().strftime(
-                            '%m%d%y-%H%M%S'))
+                    artifacts = [prep.artifact]
+            else:  # this is biom
+                if study_id is not None:
+                    artifacts = study.artifacts(
+                        dtype=data_type, artifact_type='BIOM')
+                else:
+                    artifacts = [a for a in
+                                 prep.artifact.descendants.nodes()
+                                 if a.artifact_type == 'BIOM']
+
+            # at this point artifacts has all the available artifact
+            # so we need to make sure they are public and have no has_human
+            # to be added to_download
+            for a in artifacts:
+                if a.visibility != 'public' or a.has_human:
+                    continue
+                to_download.extend(self._list_artifact_files_nginx(a))
 
-                    self._set_nginx_headers(zip_fn)
+            if not to_download:
+                raise HTTPError(422, reason='Nothing to download. If '
+                                'this is a mistake contact: %s'
+                                % qiita_config.help_email)
 
+        self._write_nginx_file_list(to_download)
+        self._set_nginx_headers(zip_fn)
         self.finish()
 
 
diff --git a/qiita_pet/test/test_download.py b/qiita_pet/test/test_download.py
@@ -535,6 +535,26 @@ def test_download(self):
             'mapping_files/5_mapping_file.txt')
         self.assertRegex(response.body.decode('ascii'), exp)
 
+        # Now let's check download prep with no raw data access
+        response = self.get('/public_download/?data=raw&prep_id=1')
+        self.assertTrue(response.reason.startswith('No raw data access.'))
+
+        # Now success
+        Study(1).public_raw_download = True
+        response = self.get('/public_download/?data=raw&prep_id=1')
+        self.assertEqual(response.code, 200)
+        exp = ('- [0-9]* /protected/raw_data/1_s_G1_L001_sequences.fastq.gz '
+               'raw_data/1_s_G1_L001_sequences.fastq.gz\n- [0-9]* /protected'
+               '/raw_data/1_s_G1_L001_sequences_barcodes.fastq.gz raw_data/'
+               '1_s_G1_L001_sequences_barcodes.fastq.gz\n- [0-9]* /protected/'
+               'templates/1_prep_1_qiime_19700101-000000.txt mapping_files/'
+               '1_mapping_file.txt\n')
+        self.assertRegex(response.body.decode('ascii'), exp)
+
+        # for simplicity, let's just check respose.code
+        response = self.get('/public_download/?data=biom&prep_id=1')
+        self.assertEqual(response.code, 200)
+
     def test_download_sample_information(self):
         response = self.get('/public_artifact_download/')
         self.assertEqual(response.code, 422)