Skip to content

Commit 6619434

Browse files
committed
upstream master
2 parents 368722e + 2d71d7d commit 6619434

File tree

11 files changed

+123
-51
lines changed

11 files changed

+123
-51
lines changed

qiita_db/analysis.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -496,14 +496,12 @@ def jobs(self):
496496
Returns
497497
-------
498498
list of ints
499-
Job ids for jobs in analysis
499+
Job ids for jobs in analysis. Empty list if no jobs attached.
500500
"""
501501
conn_handler = SQLConnectionHandler()
502502
sql = ("SELECT job_id FROM qiita.analysis_job WHERE "
503503
"analysis_id = %s".format(self._table))
504504
job_ids = conn_handler.execute_fetchall(sql, (self._id, ))
505-
if job_ids == []:
506-
return None
507505
return [job_id[0] for job_id in job_ids]
508506

509507
@property

qiita_db/metadata_template/util.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ def prefix_sample_names_with_id(md_template, study_id):
113113
# Create a new column on the metadata template that includes the
114114
# metadata template indexes prefixed with the study id
115115
md_template['sample_name_with_id'] = (study_ids + '.' +
116-
md_template.index)
116+
md_template.index.values)
117117
md_template.index = md_template.sample_name_with_id
118118
del md_template['sample_name_with_id']
119119
# The original metadata template had the index column unnamed - remove
Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
#SampleID BarcodeSequence LinkerPrimerSequence center_name center_project_name emp_status experiment_center experiment_design_description experiment_title illumina_technology library_construction_protocol pcr_primers platform run_center run_date run_prefix samp_size sample_center sequencing_meth study_center target_gene target_subfragment altitude anonymized_name assigned_from_geo collection_timestamp common_name country depth description_duplicate elevation env_biome env_feature has_extracted_data has_physical_specimen host_subject_id host_taxid latitude longitude ph physical_location samp_salinity sample_type season_environment taxon_id temp texture tot_nitro tot_org_carb water_content_soil Description
2-
1.SKB8.640193 AGCGCTCACATC GTGCCAGCMGCCGCGGTAA ANL EMP ANL micro biome of soil and rhizosphere of cannabis plants from CA Cannabis Soil Microbiome MiSeq This analysis was done as in Caporaso et al 2011 Genome research. The PCR primers (F515/R806) were developed against the V4 region of the 16S rRNA (both bacteria and archaea), which we determined would yield optimal community clustering with reads of this length using a procedure similar to that of ref. 15. [For reference, this primer pair amplifies the region 533_786 in the Escherichia coli strain 83972 sequence (greengenes accession no. prokMSA_id:470367).] The reverse PCR primer is barcoded with a 12-base error-correcting Golay code to facilitate multiplexing of up to 1,500 samples per lane, and both PCR primers contain sequencer adapter regions. FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT Illumina ANL 8/1/12 s_G1_L001_sequences .25,g ANL Sequencing by synthesis CCME 16S rRNA V4 0.0 SKB8 n 2011-11-11 13:00:00 root metagenome GAZ:United States of America 0.15 Burmese root 114.0 ENVO:Temperate grasslands, savannas, and shrubland biome ENVO:plant-associated habitat True True 1001:M7 3483 74.0894932572 65.3283470202 6.94 ANL 7.15 ENVO:soil winter 1118232 15.0 64.6 sand, 17.6 silt, 17.8 clay 1.41 5.0 0.16399999999999998 Cannabis Soil Microbiome
3-
1.SKD8.640184 TGAGTGGTCTGT GTGCCAGCMGCCGCGGTAA ANL EMP ANL micro biome of soil and rhizosphere of cannabis plants from CA Cannabis Soil Microbiome MiSeq This analysis was done as in Caporaso et al 2011 Genome research. The PCR primers (F515/R806) were developed against the V4 region of the 16S rRNA (both bacteria and archaea), which we determined would yield optimal community clustering with reads of this length using a procedure similar to that of ref. 15. [For reference, this primer pair amplifies the region 533_786 in the Escherichia coli strain 83972 sequence (greengenes accession no. prokMSA_id:470367).] The reverse PCR primer is barcoded with a 12-base error-correcting Golay code to facilitate multiplexing of up to 1,500 samples per lane, and both PCR primers contain sequencer adapter regions. FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT Illumina ANL 8/1/12 s_G1_L001_sequences .25,g ANL Sequencing by synthesis CCME 16S rRNA V4 0.0 SKD8 n 2011-11-11 13:00:00 root metagenome GAZ:United States of America 0.15 Diesel Root 114.0 ENVO:Temperate grasslands, savannas, and shrubland biome ENVO:plant-associated habitat True True 1001:D9 3483 57.571893782 32.5563076447 6.8 ANL 7.1 ENVO:soil winter 1118232 15.0 66 sand, 16.3 silt, 17.7 clay 1.51 4.32 0.17800000000000002 Cannabis Soil Microbiome
4-
1.SKB7.640196 CGGCCTAAGTTC GTGCCAGCMGCCGCGGTAA ANL EMP ANL micro biome of soil and rhizosphere of cannabis plants from CA Cannabis Soil Microbiome MiSeq This analysis was done as in Caporaso et al 2011 Genome research. The PCR primers (F515/R806) were developed against the V4 region of the 16S rRNA (both bacteria and archaea), which we determined would yield optimal community clustering with reads of this length using a procedure similar to that of ref. 15. [For reference, this primer pair amplifies the region 533_786 in the Escherichia coli strain 83972 sequence (greengenes accession no. prokMSA_id:470367).] The reverse PCR primer is barcoded with a 12-base error-correcting Golay code to facilitate multiplexing of up to 1,500 samples per lane, and both PCR primers contain sequencer adapter regions. FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT Illumina ANL 8/1/12 s_G1_L001_sequences .25,g ANL Sequencing by synthesis CCME 16S rRNA V4 0.0 SKB7 n 2011-11-11 13:00:00 root metagenome GAZ:United States of America 0.15 Burmese root 114.0 ENVO:Temperate grasslands, savannas, and shrubland biome ENVO:plant-associated habitat True True 1001:M8 3483 13.089194595 92.5274472082 6.94 ANL 7.15 ENVO:soil winter 1118232 15.0 64.6 sand, 17.6 silt, 17.8 clay 1.41 5.0 0.16399999999999998 Cannabis Soil Microbiome
2+
1.SKB8.640193 AGCGCTCACATC GTGCCAGCMGCCGCGGTAA ANL EMP ANL micro biome of soil and rhizosphere of cannabis plants from CA Cannabis Soil Microbiome MiSeq This analysis was done as in Caporaso et al 2011 Genome research. The PCR primers (F515/R806) were developed against the V4 region of the 16S rRNA (both bacteria and archaea), which we determined would yield optimal community clustering with reads of this length using a procedure similar to that of ref. 15. [For reference, this primer pair amplifies the region 533_786 in the Escherichia coli strain 83972 sequence (greengenes accession no. prokMSA_id:470367).] The reverse PCR primer is barcoded with a 12-base error-correcting Golay code to facilitate multiplexing of up to 1,500 samples per lane, and both PCR primers contain sequencer adapter regions. FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT Illumina ANL 8/1/12 s_G1_L001_sequences .25,g ANL Sequencing by synthesis CCME 16S rRNA V4 0.0 SKB8 n 2011-11-11 13:00:00 root metagenome GAZ:United States of America 0.15 Burmese root 114.0 ENVO:Temperate grasslands, savannas, and shrubland biome ENVO:plant-associated habitat True True 1001:M7 3483 74.0894932572 65.3283470202 6.94 ANL 7.15 ENVO:soil winter 1118232 15.0 64.6 sand, 17.6 silt, 17.8 clay 1.41 5.0 0.164 Cannabis Soil Microbiome
3+
1.SKD8.640184 TGAGTGGTCTGT GTGCCAGCMGCCGCGGTAA ANL EMP ANL micro biome of soil and rhizosphere of cannabis plants from CA Cannabis Soil Microbiome MiSeq This analysis was done as in Caporaso et al 2011 Genome research. The PCR primers (F515/R806) were developed against the V4 region of the 16S rRNA (both bacteria and archaea), which we determined would yield optimal community clustering with reads of this length using a procedure similar to that of ref. 15. [For reference, this primer pair amplifies the region 533_786 in the Escherichia coli strain 83972 sequence (greengenes accession no. prokMSA_id:470367).] The reverse PCR primer is barcoded with a 12-base error-correcting Golay code to facilitate multiplexing of up to 1,500 samples per lane, and both PCR primers contain sequencer adapter regions. FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT Illumina ANL 8/1/12 s_G1_L001_sequences .25,g ANL Sequencing by synthesis CCME 16S rRNA V4 0.0 SKD8 n 2011-11-11 13:00:00 root metagenome GAZ:United States of America 0.15 Diesel Root 114.0 ENVO:Temperate grasslands, savannas, and shrubland biome ENVO:plant-associated habitat True True 1001:D9 3483 57.571893782 32.5563076447 6.8 ANL 7.1 ENVO:soil winter 1118232 15.0 66 sand, 16.3 silt, 17.7 clay 1.51 4.32 0.178 Cannabis Soil Microbiome
4+
1.SKB7.640196 CGGCCTAAGTTC GTGCCAGCMGCCGCGGTAA ANL EMP ANL micro biome of soil and rhizosphere of cannabis plants from CA Cannabis Soil Microbiome MiSeq This analysis was done as in Caporaso et al 2011 Genome research. The PCR primers (F515/R806) were developed against the V4 region of the 16S rRNA (both bacteria and archaea), which we determined would yield optimal community clustering with reads of this length using a procedure similar to that of ref. 15. [For reference, this primer pair amplifies the region 533_786 in the Escherichia coli strain 83972 sequence (greengenes accession no. prokMSA_id:470367).] The reverse PCR primer is barcoded with a 12-base error-correcting Golay code to facilitate multiplexing of up to 1,500 samples per lane, and both PCR primers contain sequencer adapter regions. FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT Illumina ANL 8/1/12 s_G1_L001_sequences .25,g ANL Sequencing by synthesis CCME 16S rRNA V4 0.0 SKB7 n 2011-11-11 13:00:00 root metagenome GAZ:United States of America 0.15 Burmese root 114.0 ENVO:Temperate grasslands, savannas, and shrubland biome ENVO:plant-associated habitat True True 1001:M8 3483 13.089194595 92.5274472082 6.94 ANL 7.15 ENVO:soil winter 1118232 15.0 64.6 sand, 17.6 silt, 17.8 clay 1.41 5.0 0.164 Cannabis Soil Microbiome

qiita_db/test/test_analysis.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -320,7 +320,7 @@ def test_retrieve_jobs(self):
320320
def test_retrieve_jobs_none(self):
321321
new = Analysis.create(User("admin@foo.bar"), "newAnalysis",
322322
"A New Analysis", Analysis(1))
323-
self.assertEqual(new.jobs, None)
323+
self.assertEqual(new.jobs, [])
324324

325325
def test_retrieve_pmid(self):
326326
self.assertEqual(self.analysis.pmid, "121112")
@@ -422,8 +422,8 @@ def test_build_mapping_file(self):
422422
sql, ("%d_analysis_mapping.txt" % self.analysis.id,))
423423

424424
exp = [[15, '1_analysis_mapping.txt', 9, '852952723', 1, 1],
425-
[new_id, '1_analysis_mapping.txt', 9, '2349935429', 1, 1]]
426-
self.assertItemsEqual(obs, exp)
425+
[new_id, '1_analysis_mapping.txt', 9, '1606265094', 1, 1]]
426+
self.assertEqual(obs, exp)
427427

428428
sql = """SELECT * FROM qiita.analysis_filepath
429429
WHERE analysis_id=%s ORDER BY filepath_id"""

qiita_db/test/test_user.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ def test_validate_email(self):
4343
valid3 = 'w00t@123.456.789.com'
4444
invalid1 = '@stuff.com'
4545
invalid2 = 'asdasdásd@things.com'
46-
invalid3 = 'asdas@com'
46+
invalid3 = '.asdas@com'
4747

4848
self.assertTrue(validate_email(valid1))
4949
self.assertTrue(validate_email(valid2))
@@ -180,9 +180,9 @@ def test_exists(self):
180180
def test_exists_notindb(self):
181181
self.assertFalse(User.exists("notexist@foo.bar"))
182182

183-
def test_exists_invaid_email(self):
183+
def test_exists_invalid_email(self):
184184
with self.assertRaises(IncorrectEmailError):
185-
User.exists("notanemail@badformat")
185+
User.exists("notanemail.@badformat")
186186

187187
def test_get_email(self):
188188
self.assertEqual(self.user.email, 'admin@foo.bar')

qiita_db/user.py

Lines changed: 51 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
# The full license is in the file LICENSE, distributed with this software.
2929
# -----------------------------------------------------------------------------
3030
from __future__ import division
31-
from re import match
31+
from re import sub
3232

3333
from qiita_core.exceptions import (IncorrectEmailError, IncorrectPasswordError,
3434
IncompetentQiitaDeveloperError)
@@ -272,7 +272,15 @@ def verify_code(cls, email, code, code_type):
272272
sql = ("SELECT {1} from qiita.{0} where email"
273273
" = %s".format(cls._table, column))
274274
conn_handler = SQLConnectionHandler()
275-
db_code = conn_handler.execute_fetchone(sql, (email,))[0]
275+
db_code = conn_handler.execute_fetchone(sql, (email,))
276+
277+
# If the query didn't return anything, then there's no way the code
278+
# can match
279+
if db_code is None:
280+
return False
281+
282+
db_code = db_code[0]
283+
276284
if db_code == code and code_type == "create":
277285
# verify the user
278286
level = conn_handler.execute_fetchone(
@@ -461,10 +469,18 @@ def validate_email(email):
461469
462470
Notes
463471
-----
464-
A valid email must be of the form "string AT string" where the first string
465-
must be not empty, and consists of [a-zA-Z0-9.+]. The AT is the '@' symbol.
466-
The second string must be not empty, consist of [a-zA-Z0-9.], and is
467-
required to have at least one '.'.
472+
An email address is of the form local-part@domain_part
473+
For our purposes:
474+
475+
- No quoted strings are allowed
476+
- No unicode strings are allowed
477+
- There must be exactly one @ symbol
478+
- Neither local-part nor domain-part can be blank
479+
- The local-part cannot start or end with a dot
480+
- The local-part must be composed of the following characters:
481+
a-zA-Z0-9#_~!$&'()*+,;=:.-
482+
- The domain-part must be a valid hostname, composed of:
483+
a-zA-Z0-9.
468484
469485
Parameters
470486
----------
@@ -476,15 +492,40 @@ def validate_email(email):
476492
bool
477493
Whether or not the email is valid
478494
"""
479-
valid_chars = "a-zA-Z0-9\.\+\-"
480-
pattern = r"[%s]+@[%s]+\.[%s]+" % (valid_chars, valid_chars, valid_chars)
481-
495+
# Do not accept email addresses that have unicode characters
482496
try:
483497
email.encode('ascii')
484498
except UnicodeError:
485499
return False
486500

487-
return True if match(pattern, email) is not None else False
501+
# we are not allowing quoted strings in the email address
502+
if '"' in email:
503+
return False
504+
505+
# Must have exactly 1 @ symbol
506+
if email.count('@') != 1:
507+
return False
508+
509+
local_part, domain_part = email.split('@')
510+
511+
# Neither part can be blank
512+
if not (local_part and domain_part):
513+
return False
514+
515+
# The local part cannot begin or end with a dot
516+
if local_part.startswith('.') or local_part.endswith('.'):
517+
return False
518+
519+
# This is the full set of allowable characters for the local part.
520+
local_valid_chars = "[a-zA-Z0-9#_~!$&'()*+,;=:.-]"
521+
if len(sub(local_valid_chars, '', local_part)):
522+
return False
523+
524+
domain_valid_chars = "[a-zA-Z0-9.]"
525+
if len(sub(domain_valid_chars, '', domain_part)):
526+
return False
527+
528+
return True
488529

489530

490531
def validate_password(password):

qiita_pet/handlers/analysis_handlers.py

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -144,15 +144,14 @@ def get(self, analysis_id):
144144

145145
dropped_samples = analysis.dropped_samples
146146
dropped = defaultdict(list)
147-
if dropped_samples:
148-
for proc_data_id, samples in viewitems(dropped_samples):
149-
if not samples:
150-
continue
151-
proc_data = ProcessedData(proc_data_id)
152-
data_type = proc_data.data_type()
153-
study = proc_data.study
154-
dropped[data_type].append((Study(study).title, len(samples),
155-
', '.join(samples)))
147+
for proc_data_id, samples in viewitems(dropped_samples):
148+
if not samples:
149+
continue
150+
proc_data = ProcessedData(proc_data_id)
151+
data_type = proc_data.data_type()
152+
study = proc_data.study
153+
dropped[data_type].append((Study(study).title, len(samples),
154+
', '.join(samples)))
156155

157156
self.render("analysis_results.html", analysis_id=analysis_id,
158157
jobres=jobres, aname=analysis.name, dropped=dropped,

qiita_pet/templates/select_commands.html

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,17 @@
22
{% autoescape None %}
33

44
{%block head%}
5+
56
<script>
7+
function check_selection(){
8+
if($('.command:checkbox:checked').length > 0){
9+
return true;
10+
}else {
11+
$('#error').text('Please select at least one command.');
12+
return false;
13+
}
14+
}
15+
616
$(function () {
717
$('#data-types-tabs a:first').tab('show')
818
})
@@ -12,8 +22,8 @@
1222
{%block content %}
1323

1424
<h1>Select Commands</h1>
15-
16-
<form role="form" action="/analysis/wait/{{aid}}" method="post">
25+
<span id="error" style="color:red"></span>
26+
<form role="form" action="/analysis/wait/{{aid}}" method="post" onsubmit="return check_selection()">
1727
<div style="padding-bottom:25px;">
1828
Rarefaction Depth: <input type="number" min="10" id="rarefaction-depth" name="rarefaction-depth" class="form-control" style="width:10em;">
1929
</div>
@@ -35,7 +45,7 @@ <h1>Select Commands</h1>
3545
{% for command in commands[data_type] %}
3646
<tr>
3747
<td style="width:20px;">
38-
<input id="{{data_type}}#{{command.name}}" type="checkbox" name="commands" value="{{data_type}}#{{command.name}}">
48+
<input id="{{data_type}}#{{command.name}}" type="checkbox" name="commands" class="command" value="{{data_type}}#{{command.name}}">
3949
</td>
4050
<td>
4151
<label style="font-weight:normal;" for="{{data_type}}#{{command.name}}">{{command.name}}</label>

qiita_ware/analysis_pipeline.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,15 @@ def _build_analysis_files(analysis, r_depth=None, **kwargs):
2727
The analysis to build files for
2828
r_depth : int, optional
2929
Rarefaction depth for biom table creation. Default None
30+
31+
Raises
32+
------
33+
RuntimeError
34+
No jobs are attached to the given analysis
3035
"""
36+
if not analysis.jobs:
37+
raise RuntimeError("Analysis %d has no jobs attached!" % analysis.id)
38+
3139
# create the biom tables and add jobs to the analysis
3240
analysis.status = "running"
3341
analysis.build_files(r_depth)

qiita_ware/demux.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -438,7 +438,10 @@ def to_ascii(demux, samples=None):
438438
for samp, idx, seq, qual, bc_ori, bc_cor, bc_err in fetch(demux, samples):
439439
seq_id = id_fmt % {'sample': samp, 'idx': idx, 'bc_ori': bc_ori,
440440
'bc_cor': bc_cor, 'bc_diff': bc_err}
441-
yield formatter(seq_id, seq, qual.astype(np.uint8))
441+
if qual is not None:
442+
qual = qual.astype(np.uint8)
443+
444+
yield formatter(seq_id, seq, qual)
442445

443446

444447
def to_per_sample_ascii(demux, samples=None):

0 commit comments

Comments
 (0)