upstream master

antgonza · antgonza · commit 661943463ae0 · 2015-05-17T19:21:04.000-06:00
diff --git a/qiita_db/analysis.py b/qiita_db/analysis.py
@@ -496,14 +496,12 @@ def jobs(self):
         Returns
         -------
         list of ints
-            Job ids for jobs in analysis
+            Job ids for jobs in analysis. Empty list if no jobs attached.
         """
         conn_handler = SQLConnectionHandler()
         sql = ("SELECT job_id FROM qiita.analysis_job WHERE "
                "analysis_id = %s".format(self._table))
         job_ids = conn_handler.execute_fetchall(sql, (self._id, ))
-        if job_ids == []:
-            return None
         return [job_id[0] for job_id in job_ids]
 
     @property
diff --git a/qiita_db/metadata_template/util.py b/qiita_db/metadata_template/util.py
@@ -113,7 +113,7 @@ def prefix_sample_names_with_id(md_template, study_id):
         # Create a new column on the metadata template that includes the
         # metadata template indexes prefixed with the study id
         md_template['sample_name_with_id'] = (study_ids + '.' +
-                                              md_template.index)
+                                              md_template.index.values)
         md_template.index = md_template.sample_name_with_id
         del md_template['sample_name_with_id']
         # The original metadata template had the index column unnamed - remove
diff --git a/qiita_db/support_files/test_data/analysis/1_analysis_mapping_exp.txt b/qiita_db/support_files/test_data/analysis/1_analysis_mapping_exp.txt
@@ -1,4 +1,4 @@
 #SampleID	BarcodeSequence	LinkerPrimerSequence	center_name	center_project_name	emp_status	experiment_center	experiment_design_description	experiment_title	illumina_technology	library_construction_protocol	pcr_primers	platform	run_center	run_date	run_prefix	samp_size	sample_center	sequencing_meth	study_center	target_gene	target_subfragment	altitude	anonymized_name	assigned_from_geo	collection_timestamp	common_name	country	depth	description_duplicate	elevation	env_biome	env_feature	has_extracted_data	has_physical_specimen	host_subject_id	host_taxid	latitude	longitude	ph	physical_location	samp_salinity	sample_type	season_environment	taxon_id	temp	texture	tot_nitro	tot_org_carb	water_content_soil	Description
-1.SKB8.640193	AGCGCTCACATC	GTGCCAGCMGCCGCGGTAA	ANL		EMP	ANL	micro biome of soil and rhizosphere of cannabis plants from CA	Cannabis Soil Microbiome	MiSeq	This analysis was done as in Caporaso et al 2011 Genome research. The PCR primers (F515/R806) were developed against the V4 region of the 16S rRNA (both bacteria and archaea), which we determined would yield optimal community clustering with reads of this length using a procedure similar to that of ref. 15. [For reference, this primer pair amplifies the region 533_786 in the Escherichia coli strain 83972 sequence (greengenes accession no. prokMSA_id:470367).] The reverse PCR primer is barcoded with a 12-base error-correcting Golay code to facilitate multiplexing of up to 1,500 samples per lane, and both PCR primers contain sequencer adapter regions.	FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT	Illumina	ANL	8/1/12	s_G1_L001_sequences	.25,g	ANL	Sequencing by synthesis	CCME	16S rRNA	V4	0.0	SKB8	n	2011-11-11 13:00:00	root metagenome	GAZ:United States of America	0.15	Burmese root	114.0	ENVO:Temperate grasslands, savannas, and shrubland biome	ENVO:plant-associated habitat	True	True	1001:M7	3483	74.0894932572	65.3283470202	6.94	ANL	7.15	ENVO:soil	winter	1118232	15.0	64.6 sand, 17.6 silt, 17.8 clay	1.41	5.0	0.16399999999999998	Cannabis Soil Microbiome
-1.SKD8.640184	TGAGTGGTCTGT	GTGCCAGCMGCCGCGGTAA	ANL		EMP	ANL	micro biome of soil and rhizosphere of cannabis plants from CA	Cannabis Soil Microbiome	MiSeq	This analysis was done as in Caporaso et al 2011 Genome research. The PCR primers (F515/R806) were developed against the V4 region of the 16S rRNA (both bacteria and archaea), which we determined would yield optimal community clustering with reads of this length using a procedure similar to that of ref. 15. [For reference, this primer pair amplifies the region 533_786 in the Escherichia coli strain 83972 sequence (greengenes accession no. prokMSA_id:470367).] The reverse PCR primer is barcoded with a 12-base error-correcting Golay code to facilitate multiplexing of up to 1,500 samples per lane, and both PCR primers contain sequencer adapter regions.	FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT	Illumina	ANL	8/1/12	s_G1_L001_sequences	.25,g	ANL	Sequencing by synthesis	CCME	16S rRNA	V4	0.0	SKD8	n	2011-11-11 13:00:00	root metagenome	GAZ:United States of America	0.15	Diesel Root	114.0	ENVO:Temperate grasslands, savannas, and shrubland biome	ENVO:plant-associated habitat	True	True	1001:D9	3483	57.571893782	32.5563076447	6.8	ANL	7.1	ENVO:soil	winter	1118232	15.0	66 sand, 16.3 silt, 17.7 clay	1.51	4.32	0.17800000000000002	Cannabis Soil Microbiome
-1.SKB7.640196	CGGCCTAAGTTC	GTGCCAGCMGCCGCGGTAA	ANL		EMP	ANL	micro biome of soil and rhizosphere of cannabis plants from CA	Cannabis Soil Microbiome	MiSeq	This analysis was done as in Caporaso et al 2011 Genome research. The PCR primers (F515/R806) were developed against the V4 region of the 16S rRNA (both bacteria and archaea), which we determined would yield optimal community clustering with reads of this length using a procedure similar to that of ref. 15. [For reference, this primer pair amplifies the region 533_786 in the Escherichia coli strain 83972 sequence (greengenes accession no. prokMSA_id:470367).] The reverse PCR primer is barcoded with a 12-base error-correcting Golay code to facilitate multiplexing of up to 1,500 samples per lane, and both PCR primers contain sequencer adapter regions.	FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT	Illumina	ANL	8/1/12	s_G1_L001_sequences	.25,g	ANL	Sequencing by synthesis	CCME	16S rRNA	V4	0.0	SKB7	n	2011-11-11 13:00:00	root metagenome	GAZ:United States of America	0.15	Burmese root	114.0	ENVO:Temperate grasslands, savannas, and shrubland biome	ENVO:plant-associated habitat	True	True	1001:M8	3483	13.089194595	92.5274472082	6.94	ANL	7.15	ENVO:soil	winter	1118232	15.0	64.6 sand, 17.6 silt, 17.8 clay	1.41	5.0	0.16399999999999998	Cannabis Soil Microbiome
+1.SKB8.640193	AGCGCTCACATC	GTGCCAGCMGCCGCGGTAA	ANL		EMP	ANL	micro biome of soil and rhizosphere of cannabis plants from CA	Cannabis Soil Microbiome	MiSeq	This analysis was done as in Caporaso et al 2011 Genome research. The PCR primers (F515/R806) were developed against the V4 region of the 16S rRNA (both bacteria and archaea), which we determined would yield optimal community clustering with reads of this length using a procedure similar to that of ref. 15. [For reference, this primer pair amplifies the region 533_786 in the Escherichia coli strain 83972 sequence (greengenes accession no. prokMSA_id:470367).] The reverse PCR primer is barcoded with a 12-base error-correcting Golay code to facilitate multiplexing of up to 1,500 samples per lane, and both PCR primers contain sequencer adapter regions.	FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT	Illumina	ANL	8/1/12	s_G1_L001_sequences	.25,g	ANL	Sequencing by synthesis	CCME	16S rRNA	V4	0.0	SKB8	n	2011-11-11 13:00:00	root metagenome	GAZ:United States of America	0.15	Burmese root	114.0	ENVO:Temperate grasslands, savannas, and shrubland biome	ENVO:plant-associated habitat	True	True	1001:M7	3483	74.0894932572	65.3283470202	6.94	ANL	7.15	ENVO:soil	winter	1118232	15.0	64.6 sand, 17.6 silt, 17.8 clay	1.41	5.0	0.164	Cannabis Soil Microbiome
+1.SKD8.640184	TGAGTGGTCTGT	GTGCCAGCMGCCGCGGTAA	ANL		EMP	ANL	micro biome of soil and rhizosphere of cannabis plants from CA	Cannabis Soil Microbiome	MiSeq	This analysis was done as in Caporaso et al 2011 Genome research. The PCR primers (F515/R806) were developed against the V4 region of the 16S rRNA (both bacteria and archaea), which we determined would yield optimal community clustering with reads of this length using a procedure similar to that of ref. 15. [For reference, this primer pair amplifies the region 533_786 in the Escherichia coli strain 83972 sequence (greengenes accession no. prokMSA_id:470367).] The reverse PCR primer is barcoded with a 12-base error-correcting Golay code to facilitate multiplexing of up to 1,500 samples per lane, and both PCR primers contain sequencer adapter regions.	FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT	Illumina	ANL	8/1/12	s_G1_L001_sequences	.25,g	ANL	Sequencing by synthesis	CCME	16S rRNA	V4	0.0	SKD8	n	2011-11-11 13:00:00	root metagenome	GAZ:United States of America	0.15	Diesel Root	114.0	ENVO:Temperate grasslands, savannas, and shrubland biome	ENVO:plant-associated habitat	True	True	1001:D9	3483	57.571893782	32.5563076447	6.8	ANL	7.1	ENVO:soil	winter	1118232	15.0	66 sand, 16.3 silt, 17.7 clay	1.51	4.32	0.178	Cannabis Soil Microbiome
+1.SKB7.640196	CGGCCTAAGTTC	GTGCCAGCMGCCGCGGTAA	ANL		EMP	ANL	micro biome of soil and rhizosphere of cannabis plants from CA	Cannabis Soil Microbiome	MiSeq	This analysis was done as in Caporaso et al 2011 Genome research. The PCR primers (F515/R806) were developed against the V4 region of the 16S rRNA (both bacteria and archaea), which we determined would yield optimal community clustering with reads of this length using a procedure similar to that of ref. 15. [For reference, this primer pair amplifies the region 533_786 in the Escherichia coli strain 83972 sequence (greengenes accession no. prokMSA_id:470367).] The reverse PCR primer is barcoded with a 12-base error-correcting Golay code to facilitate multiplexing of up to 1,500 samples per lane, and both PCR primers contain sequencer adapter regions.	FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT	Illumina	ANL	8/1/12	s_G1_L001_sequences	.25,g	ANL	Sequencing by synthesis	CCME	16S rRNA	V4	0.0	SKB7	n	2011-11-11 13:00:00	root metagenome	GAZ:United States of America	0.15	Burmese root	114.0	ENVO:Temperate grasslands, savannas, and shrubland biome	ENVO:plant-associated habitat	True	True	1001:M8	3483	13.089194595	92.5274472082	6.94	ANL	7.15	ENVO:soil	winter	1118232	15.0	64.6 sand, 17.6 silt, 17.8 clay	1.41	5.0	0.164	Cannabis Soil Microbiome
diff --git a/qiita_db/test/test_analysis.py b/qiita_db/test/test_analysis.py
@@ -320,7 +320,7 @@ def test_retrieve_jobs(self):
     def test_retrieve_jobs_none(self):
         new = Analysis.create(User("admin@foo.bar"), "newAnalysis",
                               "A New Analysis", Analysis(1))
-        self.assertEqual(new.jobs, None)
+        self.assertEqual(new.jobs, [])
 
     def test_retrieve_pmid(self):
         self.assertEqual(self.analysis.pmid, "121112")
@@ -422,8 +422,8 @@ def test_build_mapping_file(self):
             sql, ("%d_analysis_mapping.txt" % self.analysis.id,))
 
         exp = [[15, '1_analysis_mapping.txt', 9, '852952723', 1, 1],
-               [new_id, '1_analysis_mapping.txt', 9, '2349935429', 1, 1]]
-        self.assertItemsEqual(obs, exp)
+               [new_id, '1_analysis_mapping.txt', 9, '1606265094', 1, 1]]
+        self.assertEqual(obs, exp)
 
         sql = """SELECT * FROM qiita.analysis_filepath
                  WHERE analysis_id=%s ORDER BY filepath_id"""
diff --git a/qiita_db/test/test_user.py b/qiita_db/test/test_user.py
@@ -43,7 +43,7 @@ def test_validate_email(self):
         valid3 = 'w00t@123.456.789.com'
         invalid1 = '@stuff.com'
         invalid2 = 'asdasdásd@things.com'
-        invalid3 = 'asdas@com'
+        invalid3 = '.asdas@com'
 
         self.assertTrue(validate_email(valid1))
         self.assertTrue(validate_email(valid2))
@@ -180,9 +180,9 @@ def test_exists(self):
     def test_exists_notindb(self):
         self.assertFalse(User.exists("notexist@foo.bar"))
 
-    def test_exists_invaid_email(self):
+    def test_exists_invalid_email(self):
         with self.assertRaises(IncorrectEmailError):
-            User.exists("notanemail@badformat")
+            User.exists("notanemail.@badformat")
 
     def test_get_email(self):
         self.assertEqual(self.user.email, 'admin@foo.bar')
diff --git a/qiita_db/user.py b/qiita_db/user.py
@@ -28,7 +28,7 @@
 # The full license is in the file LICENSE, distributed with this software.
 # -----------------------------------------------------------------------------
 from __future__ import division
-from re import match
+from re import sub
 
 from qiita_core.exceptions import (IncorrectEmailError, IncorrectPasswordError,
                                    IncompetentQiitaDeveloperError)
@@ -272,7 +272,15 @@ def verify_code(cls, email, code, code_type):
         sql = ("SELECT {1} from qiita.{0} where email"
                " = %s".format(cls._table, column))
         conn_handler = SQLConnectionHandler()
-        db_code = conn_handler.execute_fetchone(sql, (email,))[0]
+        db_code = conn_handler.execute_fetchone(sql, (email,))
+
+        # If the query didn't return anything, then there's no way the code
+        # can match
+        if db_code is None:
+            return False
+
+        db_code = db_code[0]
+
         if db_code == code and code_type == "create":
             # verify the user
             level = conn_handler.execute_fetchone(
@@ -461,10 +469,18 @@ def validate_email(email):
 
     Notes
     -----
-    A valid email must be of the form "string AT string" where the first string
-    must be not empty, and consists of [a-zA-Z0-9.+]. The AT is the '@' symbol.
-    The second string must be not empty, consist of [a-zA-Z0-9.], and is
-    required to have at least one '.'.
+    An email address is of the form local-part@domain_part
+    For our purposes:
+
+    - No quoted strings are allowed
+    - No unicode strings are allowed
+    - There must be exactly one @ symbol
+    - Neither local-part nor domain-part can be blank
+    - The local-part cannot start or end with a dot
+    - The local-part must be composed of the following characters:
+      a-zA-Z0-9#_~!$&'()*+,;=:.-
+    - The domain-part must be a valid hostname, composed of:
+      a-zA-Z0-9.
 
     Parameters
     ----------
@@ -476,15 +492,40 @@ def validate_email(email):
     bool
         Whether or not the email is valid
     """
-    valid_chars = "a-zA-Z0-9\.\+\-"
-    pattern = r"[%s]+@[%s]+\.[%s]+" % (valid_chars, valid_chars, valid_chars)
-
+    # Do not accept email addresses that have unicode characters
     try:
         email.encode('ascii')
     except UnicodeError:
         return False
 
-    return True if match(pattern, email) is not None else False
+    # we are not allowing quoted strings in the email address
+    if '"' in email:
+        return False
+
+    # Must have exactly 1 @ symbol
+    if email.count('@') != 1:
+        return False
+
+    local_part, domain_part = email.split('@')
+
+    # Neither part can be blank
+    if not (local_part and domain_part):
+        return False
+
+    # The local part cannot begin or end with a dot
+    if local_part.startswith('.') or local_part.endswith('.'):
+        return False
+
+    # This is the full set of allowable characters for the local part.
+    local_valid_chars = "[a-zA-Z0-9#_~!$&'()*+,;=:.-]"
+    if len(sub(local_valid_chars, '', local_part)):
+        return False
+
+    domain_valid_chars = "[a-zA-Z0-9.]"
+    if len(sub(domain_valid_chars, '', domain_part)):
+        return False
+
+    return True
 
 
 def validate_password(password):
diff --git a/qiita_pet/handlers/analysis_handlers.py b/qiita_pet/handlers/analysis_handlers.py
@@ -144,15 +144,14 @@ def get(self, analysis_id):
 
         dropped_samples = analysis.dropped_samples
         dropped = defaultdict(list)
-        if dropped_samples:
-            for proc_data_id, samples in viewitems(dropped_samples):
-                if not samples:
-                    continue
-                proc_data = ProcessedData(proc_data_id)
-                data_type = proc_data.data_type()
-                study = proc_data.study
-                dropped[data_type].append((Study(study).title, len(samples),
-                                           ', '.join(samples)))
+        for proc_data_id, samples in viewitems(dropped_samples):
+            if not samples:
+                continue
+            proc_data = ProcessedData(proc_data_id)
+            data_type = proc_data.data_type()
+            study = proc_data.study
+            dropped[data_type].append((Study(study).title, len(samples),
+                                       ', '.join(samples)))
 
         self.render("analysis_results.html", analysis_id=analysis_id,
                     jobres=jobres, aname=analysis.name, dropped=dropped,
diff --git a/qiita_pet/templates/select_commands.html b/qiita_pet/templates/select_commands.html
@@ -2,7 +2,17 @@
 {% autoescape None %}
 
 {%block head%}
+
 <script>
+  function check_selection(){
+    if($('.command:checkbox:checked').length > 0){
+       return true;
+   }else {
+        $('#error').text('Please select at least one command.');
+       return false;
+   }
+  }
+
   $(function () {
     $('#data-types-tabs a:first').tab('show')
   })
@@ -12,8 +22,8 @@
 {%block content %}
 
 <h1>Select Commands</h1>
-
-<form role="form" action="/analysis/wait/{{aid}}" method="post">
+<span id="error" style="color:red"></span>
+<form role="form" action="/analysis/wait/{{aid}}" method="post" onsubmit="return check_selection()">
 <div style="padding-bottom:25px;">
     Rarefaction Depth: <input type="number" min="10" id="rarefaction-depth" name="rarefaction-depth" class="form-control" style="width:10em;">
 </div>
@@ -35,7 +45,7 @@ <h1>Select Commands</h1>
           {% for command in commands[data_type] %}
             <tr>
               <td style="width:20px;">
-                <input id="{{data_type}}#{{command.name}}" type="checkbox" name="commands" value="{{data_type}}#{{command.name}}">
+                <input id="{{data_type}}#{{command.name}}" type="checkbox" name="commands" class="command" value="{{data_type}}#{{command.name}}">
               </td>
               <td>
                 <label style="font-weight:normal;" for="{{data_type}}#{{command.name}}">{{command.name}}</label>
diff --git a/qiita_ware/analysis_pipeline.py b/qiita_ware/analysis_pipeline.py
@@ -27,7 +27,15 @@ def _build_analysis_files(analysis, r_depth=None, **kwargs):
         The analysis to build files for
     r_depth : int, optional
         Rarefaction depth for biom table creation. Default None
+
+    Raises
+    ------
+    RuntimeError
+        No jobs are attached to the given analysis
     """
+    if not analysis.jobs:
+        raise RuntimeError("Analysis %d has no jobs attached!" % analysis.id)
+
     # create the biom tables and add jobs to the analysis
     analysis.status = "running"
     analysis.build_files(r_depth)
diff --git a/qiita_ware/demux.py b/qiita_ware/demux.py
@@ -438,7 +438,10 @@ def to_ascii(demux, samples=None):
     for samp, idx, seq, qual, bc_ori, bc_cor, bc_err in fetch(demux, samples):
         seq_id = id_fmt % {'sample': samp, 'idx': idx, 'bc_ori': bc_ori,
                            'bc_cor': bc_cor, 'bc_diff': bc_err}
-        yield formatter(seq_id, seq, qual.astype(np.uint8))
+        if qual is not None:
+            qual = qual.astype(np.uint8)
+
+        yield formatter(seq_id, seq, qual)
 
 
 def to_per_sample_ascii(demux, samples=None):
diff --git a/qiita_ware/test/test_demux.py b/qiita_ware/test/test_demux.py