Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix #1837 #2061

Merged
merged 6 commits into from
Feb 3, 2017
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 27 additions & 15 deletions qiita_pet/handlers/api_proxy/studies.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from qiita_db.util import (supported_filepath_types,
get_files_from_uploads_folders)
from qiita_pet.handlers.api_proxy.util import check_access
from qiita_core.exceptions import IncompetentQiitaDeveloperError


def data_types_get_req():
Expand Down Expand Up @@ -198,7 +199,7 @@ def study_prep_get_req(study_id, user_id):
def study_files_get_req(user_id, study_id, prep_template_id, artifact_type):
"""Returns the uploaded files for the study id categorized by artifact_type

It retrieves the files uploaded for the given study and tries to do a
It retrieves the files uploaded for the given study and tries to
guess on how those files should be added to the artifact of the given
type. Uses information on the prep template to try to do a better guess.

Expand Down Expand Up @@ -234,31 +235,42 @@ def study_files_get_req(user_id, study_id, prep_template_id, artifact_type):
remaining = []

uploaded = get_files_from_uploads_folders(study_id)
pt = PrepTemplate(prep_template_id).to_dataframe()
pt = PrepTemplate(prep_template_id)

if pt.study_id != study_id:
raise IncompetentQiitaDeveloperError(
"The requested prep id (%d) doesn't belong to the study "
"(%d)" % (pt.study_id, study_id))

pt = pt.to_dataframe()
ftypes_if = (ft.startswith('raw_') for ft, _ in supp_file_types
if ft != 'raw_sff')
if any(ftypes_if) and 'run_prefix' in pt.columns:
prep_prefixes = tuple(set(pt['run_prefix']))
num_prefixes = len(prep_prefixes)
for _, filename in uploaded:
if filename.startswith(prep_prefixes):
selected.append(filename)
# sorting prefixes by length to avoid collisions like: 100 1002
# 10003
prep_prefixes = sorted(prep_prefixes, key=len, reverse=True)
# group files by prefix
sfiles = {p: [f for _, f in uploaded if f.startswith(p)]
for p in prep_prefixes}
inuse = [y for x in sfiles.values() for y in x]
remaining.extend([f for _, f in uploaded if f not in inuse])

for k, v in viewitems(sfiles):
len_files = len(v)
if len_files != 1 and len_files != 2:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

would it be possible to have a comment here about this?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch, added an explanation, improved the code and added a test.

remaining.extend(v)
else:
remaining.append(filename)
v.sort()
selected.append(v)
else:
num_prefixes = 0
remaining = [f for _, f in uploaded]

# At this point we can't do anything smart about selecting by default
# the files for each type. The only thing that we can do is assume that
# the first in the supp_file_types list is the default one where files
# should be added in case of 'run_prefix' being present
file_types = [(fp_type, req, []) for fp_type, req in supp_file_types[1:]]
first = supp_file_types[0]
# Note that this works even if `run_prefix` is not in the prep template
# because selected is initialized to the empty list
file_types.insert(0, (first[0], first[1], selected))
# get file_types, format: filetype, required, list of files
file_types = [(t, req, [x[i] for x in selected if i+1 <= len(x)])
for i, (t, req) in enumerate(supp_file_types)]

# Create a list of artifacts that the user has access to, in case that
# he wants to import the files from another artifact
Expand Down
128 changes: 82 additions & 46 deletions qiita_pet/handlers/api_proxy/tests/test_studies.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,16 @@
# -----------------------------------------------------------------------------
from unittest import TestCase, main
from datetime import datetime
from os.path import exists, join, basename, isdir
from os import remove, close, mkdir
from os.path import exists, join, isdir
from os import remove
from shutil import rmtree
from tempfile import mkstemp, mkdtemp
from tempfile import mkdtemp

import pandas as pd
import numpy.testing as npt

from qiita_core.util import qiita_test_checker
from qiita_core.exceptions import IncompetentQiitaDeveloperError
import qiita_db as qdb
from qiita_pet.handlers.api_proxy.studies import (
data_types_get_req, study_get_req, study_prep_get_req, study_delete_req,
Expand Down Expand Up @@ -237,6 +238,8 @@ def test_study_prep_get_req(self):
for i in range(4, 0, -1):
qdb.artifact.Artifact(i).visibility = "private"

qdb.metadata_template.prep_template.PrepTemplate.delete(pt.id)

def test_study_prep_get_req_failed_EBI(self):
temp_dir = mkdtemp()
self._clean_up_files.append(temp_dir)
Expand Down Expand Up @@ -282,7 +285,9 @@ def test_study_prep_get_req_failed_EBI(self):
}
metadata = pd.DataFrame.from_dict(metadata_dict, orient='index',
dtype=str)
qdb.metadata_template.sample_template.SampleTemplate.create(
npt.assert_warns(
qdb.exceptions.QiitaDBWarning,
qdb.metadata_template.sample_template.SampleTemplate.create,
metadata, study)

# (C)
Expand Down Expand Up @@ -334,6 +339,8 @@ def test_study_prep_get_req_failed_EBI(self):
'status': 'success'}
self.assertEqual(obs, exp)

qdb.metadata_template.prep_template.PrepTemplate.delete(pt.id)

def test_study_prep_get_req_no_access(self):
obs = study_prep_get_req(1, 'demo@microbio.me')
exp = {'status': 'error',
Expand Down Expand Up @@ -409,6 +416,7 @@ def test_study_files_get_req(self):
'Cannabis Soils (1) - Raw data 1 (1)')]}
self.assertEqual(obs, exp)

# adding a new study for further testing
info = {
"timeseries_type_id": 1,
"metadata_complete": True,
Expand All @@ -422,58 +430,86 @@ def test_study_files_get_req(self):
"principal_investigator_id": qdb.study.StudyPerson(3),
"lab_person_id": qdb.study.StudyPerson(1)
}

new_study = qdb.study.Study.create(
qdb.user.User('test@foo.bar'), "Some New Study to get files", [1],
info)

obs = study_files_get_req('test@foo.bar', new_study.id, 1, 'FASTQ')
exp = {'status': 'success',
'message': '',
'remaining': [],
'file_types': [('raw_barcodes', True, []),
('raw_forward_seqs', True, []),
('raw_reverse_seqs', False, [])],
'num_prefixes': 1,
'artifacts': [(1, 'Identification of the Microbiomes for '
'Cannabis Soils (1) - Raw data 1 (1)')]}
# check that you can't call a this function using two unrelated
# study_id and prep_template_id
with self.assertRaises(IncompetentQiitaDeveloperError):
study_files_get_req('test@foo.bar', new_study.id, 1, 'FASTQ')

def test_study_files_get_req_per_sample_FASTQ(self):
study_id = 1
# adding a new prep for testing
PREP = qdb.metadata_template.prep_template.PrepTemplate
prep_info_dict = {
'SKB7.640196': {'run_prefix': 'test_1'},
'SKB8.640193': {'run_prefix': 'test_2'}
}
prep_info = pd.DataFrame.from_dict(prep_info_dict,
orient='index', dtype=str)
pt = npt.assert_warns(
qdb.exceptions.QiitaDBWarning, PREP.create, prep_info,
qdb.study.Study(study_id), "Metagenomic")

# getting the upload folder so we can test
study_upload_dir = join(
qdb.util.get_mountpoint("uploads")[0][1], str(study_id))

# adding just foward per sample FASTQ to the upload folder
filenames = ['test_1.R1.fastq.gz', 'test_2.R1.fastq.gz']
for f in filenames:
fpt = join(study_upload_dir, f)
open(fpt, 'w', 0).close()
self._clean_up_files.append(fpt)
obs = study_files_get_req(
'shared@foo.bar', 1, pt.id, 'per_sample_FASTQ')
exp = {
'status': 'success', 'num_prefixes': 2, 'artifacts': [],
'remaining': ['uploaded_file.txt'], 'message': '',
'file_types': [
('raw_forward_seqs', True,
['test_2.R1.fastq.gz', 'test_1.R1.fastq.gz']),
('raw_reverse_seqs', False, [])]}
self.assertEqual(obs, exp)

obs = study_files_get_req('admin@foo.bar', new_study.id, 1, 'FASTQ')
exp = {'status': 'success',
'message': '',
'remaining': [],
'file_types': [('raw_barcodes', True, []),
('raw_forward_seqs', True, []),
('raw_reverse_seqs', False, [])],
'num_prefixes': 1,
'artifacts': []}
# let's add reverse
filenames = ['test_1.R2.fastq.gz', 'test_2.R2.fastq.gz']
for f in filenames:
fpt = join(study_upload_dir, f)
open(fpt, 'w', 0).close()
self._clean_up_files.append(fpt)
obs = study_files_get_req(
'shared@foo.bar', 1, pt.id, 'per_sample_FASTQ')
exp = {'status': 'success', 'num_prefixes': 2, 'artifacts': [],
'remaining': ['uploaded_file.txt'], 'message': '',
'file_types': [('raw_forward_seqs', True,
['test_2.R1.fastq.gz', 'test_1.R1.fastq.gz']),
('raw_reverse_seqs', False,
['test_2.R2.fastq.gz', 'test_1.R2.fastq.gz'])]}
self.assertEqual(obs, exp)

# Create some 'sff' files
upload_dir = qdb.util.get_mountpoint("uploads")[0][1]
study_upload_dir = join(upload_dir, str(new_study.id))
fps = []

for i in range(2):
if not exists(study_upload_dir):
mkdir(study_upload_dir)
fd, fp = mkstemp(suffix=".sff", dir=study_upload_dir)
close(fd)
with open(fp, 'w') as f:
f.write('\n')
fps.append(fp)

self._clean_up_files.extend(fps)

obs = study_files_get_req('test@foo.bar', new_study.id, 1, 'SFF')
exp = {'status': 'success',
# let's an extra file that matches
filenames = ['test_1.R3.fastq.gz']
for f in filenames:
fpt = join(study_upload_dir, f)
open(fpt, 'w', 0).close()
self._clean_up_files.append(fpt)
obs = study_files_get_req(
'shared@foo.bar', 1, pt.id, 'per_sample_FASTQ')
exp = {'status': 'success', 'num_prefixes': 2, 'artifacts': [],
'remaining': ['test_1.R1.fastq.gz', 'test_1.R2.fastq.gz',
'test_1.R3.fastq.gz', 'uploaded_file.txt'],
'message': '',
'remaining': [basename(fpath) for fpath in sorted(fps)],
'file_types': [('raw_sff', True, [])],
'num_prefixes': 0,
'artifacts': []}
'file_types': [('raw_forward_seqs', True,
['test_2.R1.fastq.gz']),
('raw_reverse_seqs', False,
['test_2.R2.fastq.gz'])]}
self.assertEqual(obs, exp)

PREP.delete(pt.id)


if __name__ == '__main__':
main()
6 changes: 3 additions & 3 deletions qiita_pet/templates/study_ajax/add_artifact.html
Original file line number Diff line number Diff line change
Expand Up @@ -90,9 +90,6 @@ <h4><i>No files attached to this preparation</i></h4>

<form action="{% raw qiita_config.portal_dir %}/study/new_artifact/" method="POST" id="create-artifact-form">
<div class="row">
<div class="col-md-12">
<b>Name:</b> <input type="text" id="name" name="name" maxlength="35" required>
</div>
<div class="col-md-12">
<b>Select type:</b>
<select name="artifact-type" id="artifact-type">
Expand All @@ -102,6 +99,9 @@ <h4><i>No files attached to this preparation</i></h4>
{% end %}
</select>
</div>
<div class="col-md-12">
<b>Add a name for the file:</b> <input type="text" id="name" name="name" maxlength="35" required>
</div>
</div>
<div id="files-selector" hidden>
</div>
Expand Down
Loading