From 54c6f1498a0cba52bc0efd7ca906efbb6f8929fe Mon Sep 17 00:00:00 2001
From: Jose Navas <josenavasmolina@gmail.com>
Date: Thu, 17 Sep 2015 11:10:31 -0700
Subject: [PATCH 01/12] Fixing minimal mapping generation

---
 qiita_ware/processing_pipeline.py | 38 +++++++++++++++++--------------
 1 file changed, 21 insertions(+), 17 deletions(-)

diff --git a/qiita_ware/processing_pipeline.py b/qiita_ware/processing_pipeline.py
index 167dbb93e..6f4caa8ec 100644
--- a/qiita_ware/processing_pipeline.py
+++ b/qiita_ware/processing_pipeline.py
@@ -24,8 +24,9 @@ def _get_qiime_minimal_mapping(prep_template, out_dir):
     """Generates a minimal QIIME-compliant mapping file for split libraries
 
     The columns of the generated file are, in order: SampleID, BarcodeSequence,
-    LinkerPrimerSequence, Description. All values are taken from the prep
-    template except for Description, which always receive the value "Qiita MMF"
+    LinkerPrimerSequence, [ReverseLinkerPrimer] Description. All values are
+    taken from the prep template except for Description, which always receive
+    the value "Qiita MMF"
 
     Parameters
     ----------
@@ -40,42 +41,45 @@ def _get_qiime_minimal_mapping(prep_template, out_dir):
         The paths to the qiime minimal mapping files
     """
     from functools import partial
-    from collections import defaultdict
     from os.path import join
     import pandas as pd
 
-    # The prep templates has a QIIME mapping file, get it
-    qiime_map = pd.read_csv(prep_template.qiime_map_fp, sep='\t',
-                            keep_default_na=False, na_values=['unknown'],
-                            index_col=False,
-                            converters=defaultdict(lambda: str))
-    qiime_map.set_index('#SampleID', inplace=True, drop=True)
+    pt_df = prep_template.to_dataframe()
 
-    # We use our own description to avoid potential processing problems
-    qiime_map['Description'] = pd.Series(['Qiita MMF'] * len(qiime_map.index),
-                                         index=qiime_map.index)
+    rename_cols = {
+        'barcode': 'BarcodeSequence',
+        'primer': 'LinkerPrimerSequence',
+    }
 
-    # We ensure the order of the columns as QIIME is expecting
-    if 'ReverseLinkerPrimer' in qiime_map:
+    # Ensure the order of the columns as QIIME is expecting
+    if 'reverselinkerprimer' in pt_df:
+        rename_cols['reverselinkerprimer'] = 'ReverseLinkerPrimer'
         cols = ['BarcodeSequence', 'LinkerPrimerSequence',
                 'ReverseLinkerPrimer', 'Description']
     else:
         cols = ['BarcodeSequence', 'LinkerPrimerSequence', 'Description']
 
+    pt_df.rename(columns=rename_cols, inplace=True)
+
+    # Sometimes, the Description column can generate some problems in QIIME,
+    # depending on its values. We set it up to read Qiita MMF for all rows
+    pt_df['Description'] = pd.Series(['Qiita MMF'] * len(pt_df.index),
+                                     index=pt_df.index)
+
     path_builder = partial(join, out_dir)
-    if 'run_prefix' in qiime_map:
+    if 'run_prefix' in pt_df:
         # The study potentially has more than 1 lane, so we should generate a
         # qiime MMF for each of the lanes. We know how to split the prep
         # template based on the run_prefix column
         output_fps = []
-        for prefix, df in qiime_map.groupby('run_prefix'):
+        for prefix, df in pt_df.groupby('run_prefix'):
             df = df[cols]
             out_fp = path_builder("%s_MMF.txt" % prefix)
             output_fps.append(out_fp)
             df.to_csv(out_fp, index_label="#SampleID", sep='\t')
     else:
         # The study only has one lane, just write the MMF
-        df = qiime_map[cols]
+        df = pt_df[cols]
         out_fp = path_builder("prep_%d_MMF.txt" % prep_template.id)
         output_fps = [out_fp]
         df.to_csv(out_fp, index_label="#SampleID", sep='\t')

From 76604a4e5173362e8c39a68bbac69b0db0af14ec Mon Sep 17 00:00:00 2001
From: Jose Navas <josenavasmolina@gmail.com>
Date: Thu, 17 Sep 2015 15:20:00 -0700
Subject: [PATCH 02/12] Addressing @mortonjt comment

---
 qiita_ware/processing_pipeline.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/qiita_ware/processing_pipeline.py b/qiita_ware/processing_pipeline.py
index 6f4caa8ec..e3835c4bf 100644
--- a/qiita_ware/processing_pipeline.py
+++ b/qiita_ware/processing_pipeline.py
@@ -30,7 +30,7 @@ def _get_qiime_minimal_mapping(prep_template, out_dir):
 
     Parameters
     ----------
-    prep_template : PrepTemplate
+    prep_template : qiita_db.metadata_template.PrepTemplate
         The prep template from which we need to generate the minimal mapping
     out_dir : str
         Path to the output directory

From eae80aa38f6b4035abd82b95dcb90f3f7e7fc99d Mon Sep 17 00:00:00 2001
From: Jose Navas <josenavasmolina@gmail.com>
Date: Thu, 17 Sep 2015 15:20:13 -0700
Subject: [PATCH 03/12] Adding test

---
 qiita_ware/test/test_processing_pipeline.py | 53 +++++++++++++++++++--
 1 file changed, 50 insertions(+), 3 deletions(-)

diff --git a/qiita_ware/test/test_processing_pipeline.py b/qiita_ware/test/test_processing_pipeline.py
index 2b432121a..be72745db 100644
--- a/qiita_ware/test/test_processing_pipeline.py
+++ b/qiita_ware/test/test_processing_pipeline.py
@@ -14,6 +14,7 @@
 from functools import partial
 from shutil import rmtree
 
+import numpy.testing as npt
 import pandas as pd
 import gzip
 
@@ -23,12 +24,13 @@
 
 from qiita_db.sql_connection import SQLConnectionHandler
 from qiita_db.data import RawData, PreprocessedData
-from qiita_db.study import Study
+from qiita_db.study import Study, StudyPerson
+from qiita_db.user import User
 from qiita_db.parameters import (PreprocessedIlluminaParams,
                                  ProcessedSortmernaParams,
                                  Preprocessed454Params)
-
-from qiita_db.metadata_template import PrepTemplate
+from qiita_db.exceptions import QiitaDBWarning
+from qiita_db.metadata_template import PrepTemplate, SampleTemplate
 from qiita_ware.processing_pipeline import (_get_preprocess_fastq_cmd,
                                             _get_preprocess_fasta_cmd,
                                             _insert_preprocessed_data,
@@ -192,6 +194,45 @@ def test_get_qiime_minimal_mapping_single_reverse_primer(self):
         with open(exp_fps[0], "U") as f:
             self.assertEqual(f.read(), EXP_PREP_RLP)
 
+    def test_get_qiime_minimal_mapping_numeric_sample_ids(self):
+        # Get minimal mapping file works correctly with numeric sample ids. A
+        # bug was found that samples of the type <study_id>.[0-9]*0 where
+        # truncated to <study_id>.[0-9]*
+        info = {"timeseries_type_id": 1,
+                "metadata_complete": True,
+                "mixs_compliant": True,
+                "number_samples_collected": 25,
+                "number_samples_promised": 28,
+                "study_alias": "testing",
+                "study_description": "Test description",
+                "study_abstract": "Test abstract",
+                "emp_person_id": StudyPerson(2),
+                "principal_investigator_id": StudyPerson(3),
+                "lab_person_id": StudyPerson(1)
+                }
+        new_study = Study.create(User('test@foo.bar'), "Test study", [1], info)
+        metadata_dict = {'1': {'host_subject_id': 'NotIdentified'},
+                         '10': {'host_subject_id': 'NotIdentified'}}
+        metadata = pd.DataFrame.from_dict(metadata_dict, orient='index')
+        npt.assert_warns(QiitaDBWarning, SampleTemplate.create, metadata,
+                         new_study)
+        metadata_dict = {'1': {'str_column': 'Value for sample 1',
+                               'primer': 'GTGCCAGCMGCCGCGGTAA',
+                               'barcode': 'GTCCGCAAGTTA'},
+                         '10': {'str_column': 'Value for sample 1',
+                                'primer': 'GTGCCAGCMGCCGCGGTAA',
+                                'barcode': 'CGTAGAGCTCTC'}}
+        metadata = pd.DataFrame.from_dict(metadata_dict, orient='index')
+        pt = npt.assert_warns(QiitaDBWarning, PrepTemplate.create, metadata,
+                              new_study, 2)
+        out_dir = mkdtemp()
+        obs_fps = _get_qiime_minimal_mapping(pt, out_dir)
+        exp_fps = [join(out_dir, 'prep_%s_MMF.txt' % pt.id)]
+        self.assertEqual(obs_fps, exp_fps)
+        self.assertTrue(exists(exp_fps[0]))
+        with open(exp_fps[0], 'U') as f:
+            self.assertEqual(f.read(), EXP_PREP_NUM.format(new_study.id))
+
     def test_get_qiime_minimal_mapping_multiple(self):
         # We need to create a prep template in which we have different run
         # prefix values, so we can test this case
@@ -771,6 +812,12 @@ def test_insert_processed_data_target_gene(self):
     "1.SKM8.640201\tCCGATGCCTTGA\tGTGCCAGCMGCCGCGGTAA\tGTGCCAGCM\tQiita MMF\n"
     "1.SKM9.640192\tAGCAGGCACGAA\tGTGCCAGCMGCCGCGGTAA\tGTGCCAGCM\tQiita MMF\n")
 
+EXP_PREP_NUM = (
+    "#SampleID\tBarcodeSequence\tLinkerPrimerSequence\tDescription\n"
+    "{0}.1\tGTCCGCAAGTTA\tGTGCCAGCMGCCGCGGTAA\tQiita MMF\n"
+    "{0}.10\tCGTAGAGCTCTC\tGTGCCAGCMGCCGCGGTAA\tQiita MMF\n"
+)
+
 EXP_PREP_1 = (
     "#SampleID\tBarcodeSequence\tLinkerPrimerSequence\tDescription\n"
     "1.SKB8.640193\tGTCCGCAAGTTA\tGTGCCAGCMGCCGCGGTAA\tQiita MMF\n"

From 28eafdadd06c71f74fcf3ed3aef71c2df8d60034 Mon Sep 17 00:00:00 2001
From: Jose Navas <josenavasmolina@gmail.com>
Date: Mon, 28 Sep 2015 10:05:46 -0700
Subject: [PATCH 04/12] Add command to update the raw data

---
 qiita_db/commands.py | 57 +++++++++++++++++++++++++++++++++++++++++++-
 scripts/qiita        |  7 ++++++
 2 files changed, 63 insertions(+), 1 deletion(-)

diff --git a/qiita_db/commands.py b/qiita_db/commands.py
index 4004ae12d..b062cb2cf 100644
--- a/qiita_db/commands.py
+++ b/qiita_db/commands.py
@@ -307,6 +307,60 @@ def load_parameters_from_cmd(name, fp, table):
     return constructor.create(name, **params)
 
 
+def update_raw_data_from_cmd(filepaths, filepath_types, study_id, rd_id=None):
+    """Updates the raw data of the study 'study_id'
+
+    Parameters
+    ----------
+    filepaths : iterable of str
+        Paths to the raw data files
+    filepath_types : iterable of str
+        Describes the contents of the files
+    study_id : int
+        The study_id of the study to be updated
+    rd_id : int, optional
+        The id of the raw data to be updated. If not provided, the raw data
+        with lowest id in the study will be updated
+
+    Returns
+    -------
+    qiita_db.data.RawData
+
+    Raises
+    ------
+    ValueError
+        If 'filepaths' and 'filepath_types' do not have the same length
+        If the study does not have any raw data
+        If rd_id is provided and it does not belong to the given study
+    """
+    if len(filepaths) != len(filepath_types):
+        raise ValueError("Please provide exactly one filepath_type for each"
+                         "and every filepath")
+    with TRN:
+        study = Study(study_id)
+        raw_data_ids = study.raw_data()
+        if not raw_data_ids:
+            raise ValueError("Study %s does not have any raw data" % study_id)
+
+        if rd_id:
+            if rd_id not in raw_data_ids:
+                raise ValueError(
+                    "The raw data %d does not exist in the study %d. Available"
+                    " raw data: %s"
+                    % (rd_id, study_id, ', '.join(map(str, raw_data_ids))))
+            raw_data = RawData(rd_id)
+        else:
+            raw_data = RawData(sorted(raw_data_ids)[0])
+
+        filepath_types_dict = get_filepath_types()
+        filepath_types = [filepath_types_dict[x] for x in filepath_types]
+
+        raw_data.clear_filepaths()
+        raw_data.add_filepaths(list(zip(filepaths, filepath_types)))
+
+    return raw_data
+
+
 def update_preprocessed_data_from_cmd(sl_out_dir, study_id, ppd_id=None):
     """Updates the preprocessed data of the study 'study_id'
 
@@ -351,7 +405,8 @@ def update_preprocessed_data_from_cmd(sl_out_dir, study_id, ppd_id=None):
         study = Study(study_id)
         ppds = study.preprocessed_data()
         if not ppds:
-            raise ValueError("Study %s does not have any preprocessed data")
+            raise ValueError("Study %s does not have any preprocessed data",
+                             study_id)
 
         if ppd_id:
             if ppd_id not in ppds:
diff --git a/scripts/qiita b/scripts/qiita
index 2c0317da2..5f4641f71 100755
--- a/scripts/qiita
+++ b/scripts/qiita
@@ -294,6 +294,13 @@ def update_preprocessed_data(sl_out_dir, study,  preprocessed_data):
                                             preprocessed_data)
     click.echo("Preprocessed data %s successfully updated" % ppd.id)
 
+
+@db.command()
+def update_raw_data():
+    """"""
+    rd = update_raw_data_from_cmd()
+    click.echo("Raw data %s successfully updated" % rd.id)
+
 # #############################################################################
 # PORTAL COMMANDS
 # #############################################################################

From 14f75256700c9c6b658e34412008a58a2c65cc66 Mon Sep 17 00:00:00 2001
From: Jose Navas <josenavasmolina@gmail.com>
Date: Wed, 30 Sep 2015 11:37:12 -0700
Subject: [PATCH 05/12] Fixing bug in the command

---
 qiita_db/commands.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/qiita_db/commands.py b/qiita_db/commands.py
index b062cb2cf..0533305ff 100644
--- a/qiita_db/commands.py
+++ b/qiita_db/commands.py
@@ -18,7 +18,7 @@
 from .study import Study, StudyPerson
 from .user import User
 from .util import (get_filetypes, get_filepath_types, compute_checksum,
-                   convert_to_id)
+                   convert_to_id, move_filepaths_to_upload_folder)
 from .data import RawData, PreprocessedData, ProcessedData
 from .metadata_template import (SampleTemplate, PrepTemplate,
                                 load_template_to_dataframe)
@@ -355,7 +355,12 @@ def update_raw_data_from_cmd(filepaths, filepath_types, study_id, rd_id=None):
         filepath_types_dict = get_filepath_types()
         filepath_types = [filepath_types_dict[x] for x in filepath_types]
 
-        raw_data.clear_filepaths()
+        fps = raw_data.get_filepaths()
+        sql = "DELETE FROM qiita.raw_filepath WHERE raw_data_id = %s"
+        TRN.add(sql, [raw_data.id])
+        TRN.execute()
+        move_filepaths_to_upload_folder(study_id, fps)
+
         raw_data.add_filepaths(list(zip(filepaths, filepath_types)))
 
     return raw_data

From 42f1e3255132c81c50325fd72169d059487c8500 Mon Sep 17 00:00:00 2001
From: Jose Navas <josenavasmolina@gmail.com>
Date: Wed, 30 Sep 2015 11:37:26 -0700
Subject: [PATCH 06/12] Adding tests

---
 qiita_db/test/test_commands.py | 103 ++++++++++++++++++++++++++++++++-
 1 file changed, 101 insertions(+), 2 deletions(-)

diff --git a/qiita_db/test/test_commands.py b/qiita_db/test/test_commands.py
index 4d161ff07..0fdaa42e5 100644
--- a/qiita_db/test/test_commands.py
+++ b/qiita_db/test/test_commands.py
@@ -14,6 +14,7 @@
 from future.utils.six import StringIO
 from future import standard_library
 from functools import partial
+from operator import itemgetter
 
 import pandas as pd
 
@@ -23,13 +24,14 @@
                                load_processed_data_cmd,
                                load_preprocessed_data_from_cmd,
                                load_parameters_from_cmd,
+                               update_raw_data_from_cmd,
                                update_preprocessed_data_from_cmd)
 from qiita_db.environment_manager import patch
 from qiita_db.study import Study, StudyPerson
 from qiita_db.user import User
-from qiita_db.data import PreprocessedData
+from qiita_db.data import PreprocessedData, RawData
 from qiita_db.util import (get_count, check_count, get_db_files_base_dir,
-                           get_mountpoint)
+                           get_mountpoint, compute_checksum)
 from qiita_db.metadata_template import PrepTemplate
 from qiita_core.util import qiita_test_checker
 from qiita_ware.processing_pipeline import generate_demux_file
@@ -452,6 +454,103 @@ def test_python_patch(self):
         self._assert_current_patch('10.sql')
 
 
+@qiita_test_checker()
+class TestUpdateRawDataFromCmd(TestCase):
+    def setUp(self):
+        fd, seqs_fp = mkstemp(suffix='_seqs.fastq')
+        close(fd)
+        fd, barcodes_fp = mkstemp(suffix='_barcodes.fastq')
+        close(fd)
+        self.filepaths = [seqs_fp, barcodes_fp]
+        self.checksums = []
+        for fp in sorted(self.filepaths):
+            with open(fp, 'w') as f:
+                f.write("%s\n" % fp)
+            self.checksums.append(compute_checksum(fp))
+        self.filepaths_types = ["raw_forward_seqs", "raw_barcodes"]
+        self._clean_up_files = [seqs_fp, barcodes_fp]
+
+        info = {
+            "timeseries_type_id": 1,
+            "metadata_complete": True,
+            "mixs_compliant": True,
+            "number_samples_collected": 25,
+            "number_samples_promised": 28,
+            "study_alias": "FCM",
+            "study_description": "Microbiome of people who eat nothing but "
+                                 "fried chicken",
+            "study_abstract": "Exploring how a high fat diet changes the "
+                              "gut microbiome",
+            "emp_person_id": StudyPerson(2),
+            "principal_investigator_id": StudyPerson(3),
+            "lab_person_id": StudyPerson(1)
+        }
+        self.new_study = Study.create(User("test@foo.bar"),
+                                      "Update raw data test",
+                                      efo=[1], info=info)
+        self.study = Study(1)
+        # The files for the RawData object attached to study 1 does not exist.
+        # Create them so we can actually perform the tests
+        for _, fp, _ in RawData(1).get_filepaths():
+            with open(fp, 'w') as f:
+                f.write('\n')
+            self._clean_up_files.append(fp)
+
+    def tearDown(self):
+        for f in self._clean_up_files:
+            if exists(f):
+                remove(f)
+
+    def test_update_raw_data_from_cmd_diff_length(self):
+        with self.assertRaises(ValueError):
+            update_raw_data_from_cmd(self.filepaths[1:], self.filepaths_types,
+                                     self.study.id)
+        with self.assertRaises(ValueError):
+            update_raw_data_from_cmd(self.filepaths, self.filepaths_types[1:],
+                                     self.study.id)
+
+    def test_update_raw_data_from_cmd_no_raw_data(self):
+        with self.assertRaises(ValueError):
+            update_raw_data_from_cmd(self.filepaths, self.filepaths_types,
+                                     self.new_study.id)
+
+    def test_update_raw_data_from_cmd_wrong_raw_data_id(self):
+        # Using max(raw_data_ids) + 1 to make sure that the raw data id
+        # passed does not belong to the study
+        with self.assertRaises(ValueError):
+            update_raw_data_from_cmd(self.filepaths, self.filepaths_types,
+                                     self.study.id,
+                                     max(self.study.raw_data()) + 1)
+
+    def test_update_raw_data_from_cmd(self):
+        rd = update_raw_data_from_cmd(self.filepaths, self.filepaths_types,
+                                      self.study.id)
+        # Make sure that we are cleaning the environment
+        for _, fp, _ in rd.get_filepaths():
+            self._clean_up_files.append(fp)
+
+        # The checkums are in filepath order. If we sort the rd.get_filepath()
+        # result by the filepath (itemgetter(1)) we will get them in the same
+        # order, so the checksums will not fail
+        for obs, exp in zip(sorted(rd.get_filepaths(), key=itemgetter(1)),
+                            self.checksums):
+            self.assertEqual(compute_checksum(obs[1]), exp)
+
+    def test_update_raw_data_from_cmd_rd_id(self):
+        rd = update_raw_data_from_cmd(self.filepaths, self.filepaths_types,
+                                      self.study.id, self.study.raw_data()[0])
+        # Make sure that we are cleaning the environment
+        for _, fp, _ in rd.get_filepaths():
+            self._clean_up_files.append(fp)
+
+        # The checkums are in filepath order. If we sort the rd.get_filepath()
+        # result by the filepath (itemgetter(1)) we will get them in the same
+        # order, so the checksums will not fail
+        for obs, exp in zip(sorted(rd.get_filepaths(), key=itemgetter(1)),
+                            self.checksums):
+            self.assertEqual(compute_checksum(obs[1]), exp)
+
+
 @qiita_test_checker()
 class TestUpdatePreprocessedDataFromCmd(TestCase):
     def setUp(self):

From 47f9286e9be716be610dd9865619aa8d8fc499f6 Mon Sep 17 00:00:00 2001
From: Jose Navas <josenavasmolina@gmail.com>
Date: Wed, 30 Sep 2015 11:42:52 -0700
Subject: [PATCH 07/12] Finishing up CLI

---
 scripts/qiita | 27 ++++++++++++++++++++-------
 1 file changed, 20 insertions(+), 7 deletions(-)

diff --git a/scripts/qiita b/scripts/qiita
index 5f4641f71..aefd5f0fe 100755
--- a/scripts/qiita
+++ b/scripts/qiita
@@ -30,6 +30,7 @@ from qiita_db.commands import (load_sample_template_from_cmd,
                                load_preprocessed_data_from_cmd,
                                load_prep_template_from_cmd,
                                load_parameters_from_cmd, SUPPORTED_PARAMS,
+                               update_raw_data_from_cmd,
                                update_preprocessed_data_from_cmd)
 from qiita_db.portal import Portal
 from qiita_db.sql_connection import SQLConnectionHandler
@@ -278,6 +279,25 @@ def load_parameters(fp, table, name):
                % (table, param.id))
 
 
+@db.command()
+@click.option('--fp', required=True, type=click.Path(resolve_path=True,
+              readable=True, exists=True), multiple=True,
+              help='Path to the raw data file. This option can be used '
+              'multiple times if there are multiple raw data files.')
+@click.option('--fp_type', required=True, multiple=True, help='Describes the '
+              'contents of the file. Pass one fp_type per fp.',
+              type=click.Choice(get_filepath_types().keys()))
+@click.option('--study', required=True, type=int,
+              help='Study whose raw data will be updated')
+@click.option('--raw_data', required=False, type=int,
+              help='Raw data to be updated. If not passed, the raw data with '
+                   'lowest id in the study will be updated.')
+def update_raw_data(fp, fp_type, study, raw_data):
+    """Updates the raw data with the provided raw data files"""
+    rd = update_raw_data_from_cmd(fp, fp_type, study, rd_id=raw_data)
+    click.echo("Raw data %s successfully updated" % rd.id)
+
+
 @db.command()
 @click.argument('sl_out_dir', required=True,
                 type=click.Path(resolve_path=True, readable=True, exists=True,
@@ -294,13 +314,6 @@ def update_preprocessed_data(sl_out_dir, study,  preprocessed_data):
                                             preprocessed_data)
     click.echo("Preprocessed data %s successfully updated" % ppd.id)
 
-
-@db.command()
-def update_raw_data():
-    """"""
-    rd = update_raw_data_from_cmd()
-    click.echo("Raw data %s successfully updated" % rd.id)
-
 # #############################################################################
 # PORTAL COMMANDS
 # #############################################################################

From c7a7ad46bcfff90812c55678280286453dada5a0 Mon Sep 17 00:00:00 2001
From: Jose Navas <josenavasmolina@gmail.com>
Date: Wed, 30 Sep 2015 15:34:54 -0700
Subject: [PATCH 08/12] Addressing comments

---
 qiita_db/commands.py | 4 ++--
 scripts/qiita        | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/qiita_db/commands.py b/qiita_db/commands.py
index 0533305ff..96199827e 100644
--- a/qiita_db/commands.py
+++ b/qiita_db/commands.py
@@ -334,13 +334,13 @@ def update_raw_data_from_cmd(filepaths, filepath_types, study_id, rd_id=None):
         If rd_id is provided and it does not belong to the given study
     """
     if len(filepaths) != len(filepath_types):
-        raise ValueError("Please provide exactly one filepath_type for each"
+        raise ValueError("Please provide exactly one filepath_type for each "
                          "and every filepath")
     with TRN:
         study = Study(study_id)
         raw_data_ids = study.raw_data()
         if not raw_data_ids:
-            raise ValueError("Study %s does not have any raw data" % study_id)
+            raise ValueError("Study %d does not have any raw data" % study_id)
 
         if rd_id:
             if rd_id not in raw_data_ids:
diff --git a/scripts/qiita b/scripts/qiita
index aefd5f0fe..5e2af44f4 100755
--- a/scripts/qiita
+++ b/scripts/qiita
@@ -287,9 +287,9 @@ def load_parameters(fp, table, name):
 @click.option('--fp_type', required=True, multiple=True, help='Describes the '
               'contents of the file. Pass one fp_type per fp.',
               type=click.Choice(get_filepath_types().keys()))
-@click.option('--study', required=True, type=int,
+@click.option('--study', required=True, type=click.IntRange(1),
               help='Study whose raw data will be updated')
-@click.option('--raw_data', required=False, type=int,
+@click.option('--raw_data', required=False, type=click.IntRange(1),
               help='Raw data to be updated. If not passed, the raw data with '
                    'lowest id in the study will be updated.')
 def update_raw_data(fp, fp_type, study, raw_data):

From 0f8ecf72a7aa1c8db6ec54493dc2ab741992b703 Mon Sep 17 00:00:00 2001
From: Jose Navas <josenavasmolina@gmail.com>
Date: Wed, 30 Sep 2015 15:36:17 -0700
Subject: [PATCH 09/12] Adding note to the CHANGELOG.md

---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0d7504539..5f0c34062 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,7 @@ Version 0.2.0-dev (Changes since version 0.2.0 go here)
 -------------------------------------------------------
 
 * Users can now change values and add samples and/or columns to sample and prep templates using the <kbd>Update</kbd> button (see the prep template and sample template tabs).
+* The raw files of a RawData can be now updated using the `qiita db update_raw_data` CLI command.
 
 Version 0.2.0 (2015-08-25)
 --------------------------

From 8e333e523ff4f7c14f2fb48fe5befcf16196c0c7 Mon Sep 17 00:00:00 2001
From: Jose Navas <josenavasmolina@gmail.com>
Date: Wed, 30 Sep 2015 15:55:37 -0700
Subject: [PATCH 10/12] Adding specific error instead of a general KeyError

---
 qiita_db/commands.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/qiita_db/commands.py b/qiita_db/commands.py
index 96199827e..d833e3bcd 100644
--- a/qiita_db/commands.py
+++ b/qiita_db/commands.py
@@ -353,7 +353,15 @@ def update_raw_data_from_cmd(filepaths, filepath_types, study_id, rd_id=None):
             raw_data = RawData(sorted(raw_data_ids)[0])
 
         filepath_types_dict = get_filepath_types()
-        filepath_types = [filepath_types_dict[x] for x in filepath_types]
+        try:
+            filepath_types = [filepath_types_dict[x] for x in filepath_types]
+        except KeyError:
+            supported_types = filepath_types_dict.keys()
+            unsupported_types = set(filepath_types).difference(supported_types)
+            raise ValueError(
+                "Some filepath types provided are not recognized (%s). "
+                "Please choose from: %s"
+                % (', '.join(unsupported_types), ', '.join(supported_types)))
 
         fps = raw_data.get_filepaths()
         sql = "DELETE FROM qiita.raw_filepath WHERE raw_data_id = %s"

From da2f05f910844e8197e354535b85be90807103c6 Mon Sep 17 00:00:00 2001
From: Jose Navas <josenavasmolina@gmail.com>
Date: Wed, 30 Sep 2015 16:01:05 -0700
Subject: [PATCH 11/12] Cleaning up the environment

---
 qiita_db/test/test_commands.py | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/qiita_db/test/test_commands.py b/qiita_db/test/test_commands.py
index 0fdaa42e5..37fdada7f 100644
--- a/qiita_db/test/test_commands.py
+++ b/qiita_db/test/test_commands.py
@@ -31,7 +31,8 @@
 from qiita_db.user import User
 from qiita_db.data import PreprocessedData, RawData
 from qiita_db.util import (get_count, check_count, get_db_files_base_dir,
-                           get_mountpoint, compute_checksum)
+                           get_mountpoint, compute_checksum,
+                           get_files_from_uploads_folders)
 from qiita_db.metadata_template import PrepTemplate
 from qiita_core.util import qiita_test_checker
 from qiita_ware.processing_pipeline import generate_demux_file
@@ -496,7 +497,15 @@ def setUp(self):
                 f.write('\n')
             self._clean_up_files.append(fp)
 
+        self.uploaded_files = get_files_from_uploads_folders(
+            str(self.study.id))
+
     def tearDown(self):
+        new_uploaded_files = get_files_from_uploads_folders(str(self.study.id))
+        new_files = set(new_uploaded_files).difference(self.uploaded_files)
+        path_builder = partial(join, get_mountpoint("uploads")[0][1], '1')
+        for _, fp in new_files:
+            self._clean_up_files.append(path_builder(fp))
         for f in self._clean_up_files:
             if exists(f):
                 remove(f)

From 2a589bd060cf763a5a960f1425781ad5ec1b1019 Mon Sep 17 00:00:00 2001
From: Jose Navas <josenavasmolina@gmail.com>
Date: Thu, 8 Oct 2015 15:20:42 -0700
Subject: [PATCH 12/12] Fixing error when updating a sample template with a df
 with less samples than in the DB - and adding a specific test

---
 .../metadata_template/base_metadata_template.py     | 12 ++++--------
 .../metadata_template/test/test_sample_template.py  | 13 +++++++++++++
 2 files changed, 17 insertions(+), 8 deletions(-)

diff --git a/qiita_db/metadata_template/base_metadata_template.py b/qiita_db/metadata_template/base_metadata_template.py
index d5b61079a..f027f6aad 100644
--- a/qiita_db/metadata_template/base_metadata_template.py
+++ b/qiita_db/metadata_template/base_metadata_template.py
@@ -1160,19 +1160,15 @@ def update(self, md_template):
                     % ', '.join(columns_diff))
 
             # In order to speed up some computation, let's compare only the
-            # common columns. current_map.columns is a superset of
-            # new_map.columns, so this will not fail
-            current_map = current_map[new_map.columns]
+            # common columns and rows. current_map.columns and
+            # current_map.index are supersets of new_map.columns and
+            # new_map.index, respectivelly, so this will not fail
+            current_map = current_map[new_map.columns].loc[new_map.index]
 
             # Get the values that we need to change
             # diff_map is a DataFrame that hold boolean values. If a cell is
             # True, means that the new_map is different from the current_map
             # while False means that the cell has the same value
-            # In order to compare them, they've to be identically labeled, so
-            # we need to sort the 'index' axis to be identically labeled. The
-            # 'column' axis is already the same given the previous line of code
-            current_map.sort_index(axis='index', inplace=True)
-            new_map.sort_index(axis='index', inplace=True)
             diff_map = current_map != new_map
             # ne_stacked holds a MultiIndexed DataFrame in which the first
             # level of indexing is the sample_name and the second one is the
diff --git a/qiita_db/metadata_template/test/test_sample_template.py b/qiita_db/metadata_template/test/test_sample_template.py
index d49b97153..a3df6790f 100644
--- a/qiita_db/metadata_template/test/test_sample_template.py
+++ b/qiita_db/metadata_template/test/test_sample_template.py
@@ -1301,6 +1301,19 @@ def test_update(self):
         with self.assertRaises(QiitaDBError):
             st.update(self.metadata_dict_updated_column_error)
 
+    def test_update_fewer_samples(self):
+        """Updates using a dataframe with less samples that in the DB"""
+        st = SampleTemplate.create(self.metadata, self.new_study)
+        new_metadata = pd.DataFrame.from_dict(
+            {'Sample1': {'physical_specimen_location': 'CHANGE'}},
+            orient='index')
+        exp = {s_id: st[s_id]._to_dict() for s_id in st}
+        s_id = '%d.Sample1' % self.new_study.id
+        exp[s_id]['physical_specimen_location'] = 'CHANGE'
+        npt.assert_warns(QiitaDBWarning, st.update, new_metadata)
+        obs = {s_id: st[s_id]._to_dict() for s_id in st}
+        self.assertEqual(obs, exp)
+
     def test_update_numpy(self):
         """Update values in existing mapping file with numpy values"""
         metadata_dict = {