[MISC] Switch from uppercase seqan3::field names to lower case.

seqan · Dec 12, 2019 · c582635 · c582635
1 parent 752fad9
commit c582635
Show file tree

Hide file tree

Showing 87 changed files with 1,113 additions and 1,075 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -36,8 +36,8 @@ If possible, provide tooling that performs the changes, e.g. a shell-script.
 #### I/O
 
 * Asynchronous input (background file reading) supported via seqan3::view::async_input_buffer.
-* Reading field::CIGAR into a vector over seqan3::cigar is supported via seqan3::alignment_file_input.
-* Writing field::CIGAR into a vector over seqan3::cigar is supported via seqan3::alignment_file_output.
+* Reading field::cigar into a vector over seqan3::cigar is supported via seqan3::alignment_file_input.
+* Writing field::cigar into a vector over seqan3::cigar is supported via seqan3::alignment_file_output.
 
 ## API changes
 
@@ -64,7 +64,9 @@ If possible, provide tooling that performs the changes, e.g. a shell-script.
 
 * The field-based in- and output interface for structure files through std::get and std::tie has been removed.
   Output can instead be achieved with seqan3::views:zip(), for input we will implement unzip() in the future.
-* The `field::FLAG` of SAM/BAM input and output is now an **enum** instead of a simple integer (see seqan3::sam_flag).
+* The `field::flag` of SAM/BAM input and output is now an **enum** instead of a simple integer (see seqan3::sam_flag).
+* Uppercase seqan3::field names are deprecated. Use the lower case field names instead. You can easily find and replace
+  all occurrences by the following regex: find `field::([A-Z_]+)` replace `field::\L$1`.
 
 #### Range
 

diff --git a/doc/tutorial/alignment_file/alignment_file_read_cigar.cpp b/doc/tutorial/alignment_file/alignment_file_read_cigar.cpp
@@ -36,7 +36,7 @@ int main()
 {
     std::filesystem::path tmp_dir = std::filesystem::temp_directory_path(); // get the temp directory
 
-    alignment_file_input fin{tmp_dir/"my.sam", fields<field::CIGAR>{}};
+    alignment_file_input fin{tmp_dir/"my.sam", fields<field::cigar>{}};
 
     for (auto & [cigar] : fin)
         debug_stream << cigar << std::endl;

diff --git a/doc/tutorial/alignment_file/alignment_file_snippets.cpp b/doc/tutorial/alignment_file/alignment_file_snippets.cpp
@@ -54,7 +54,7 @@ int main()
 //![writing]
     auto filename = std::filesystem::temp_directory_path()/"out.sam";
 
-    alignment_file_output fout{filename, fields<field::FLAG, field::MAPQ>{}};
+    alignment_file_output fout{filename, fields<field::flag, field::mapq>{}};
 
     size_t mymapq{0};
     seqan3::sam_flag flag{seqan3::sam_flag::unmapped};
@@ -89,7 +89,7 @@ int main()
 //![read_custom_fields]
     auto filename = std::filesystem::temp_directory_path()/"example.sam";
 
-    alignment_file_input fin{filename, fields<field::ID, field::SEQ, field::FLAG>{}};
+    alignment_file_input fin{filename, fields<field::id, field::seq, field::flag>{}};
 
     for (auto & [id, seq, flag /*order!*/] : fin)
     {
@@ -104,7 +104,7 @@ int main()
 //![alignments_without_ref]
     auto filename = std::filesystem::temp_directory_path()/"example.sam";
 
-    alignment_file_input fin{filename, fields<field::ID, field::ALIGNMENT>{}};
+    alignment_file_input fin{filename, fields<field::id, field::alignment>{}};
 
     for (auto & [ id, alignment ] : fin)
     {
@@ -120,7 +120,7 @@ int main()
     std::vector<std::string> ref_ids{"ref"}; // list of one reference name
     std::vector<dna5_vector> ref_sequences{"AGAGTTCGAGATCGAGGACTAGCGACGAGGCAGCGAGCGATCGAT"_dna5};
 
-    alignment_file_input fin{filename, ref_ids, ref_sequences, fields<field::ALIGNMENT>{}};
+    alignment_file_input fin{filename, ref_ids, ref_sequences, fields<field::alignment>{}};
 
     for (auto & [ alignment ] : fin)
     {

diff --git a/doc/tutorial/alignment_file/alignment_file_solution1.cpp b/doc/tutorial/alignment_file/alignment_file_solution1.cpp
@@ -38,12 +38,12 @@ int main()
 {
     std::filesystem::path tmp_dir = std::filesystem::temp_directory_path(); // get the temp directory
 
-    alignment_file_input fin{tmp_dir/"my.sam", fields<field::MAPQ>{}};
+    alignment_file_input fin{tmp_dir/"my.sam", fields<field::mapq>{}};
 
     double sum{};
     size_t c{};
 
-    std::ranges::for_each(fin.begin(), fin.end(), [&sum, &c] (auto & rec) { sum += get<field::MAPQ>(rec); ++c; });
+    std::ranges::for_each(fin.begin(), fin.end(), [&sum, &c] (auto & rec) { sum += get<field::mapq>(rec); ++c; });
 
     debug_stream << "Average: " << (sum/c) << std::endl;
 }

diff --git a/doc/tutorial/alignment_file/alignment_file_solution2.cpp b/doc/tutorial/alignment_file/alignment_file_solution2.cpp
@@ -79,15 +79,15 @@ int main()
 
     // read in reference information
     sequence_file_input<my_traits> reference_file{tmp_dir/"reference.fasta"};
-    concatenated_sequences<std::string> ref_ids = get<field::ID>(reference_file);
-    std::vector<std::vector<dna5>> ref_seqs = get<field::SEQ>(reference_file);
+    concatenated_sequences<std::string> ref_ids = get<field::id>(reference_file);
+    std::vector<std::vector<dna5>> ref_seqs = get<field::seq>(reference_file);
 
     alignment_file_input mapping_file{tmp_dir/"mapping.sam",
                                       ref_ids,
                                       ref_seqs,
-                                      fields<field::ID,field::REF_ID, field::MAPQ, field::ALIGNMENT>{}};
+                                      fields<field::id,field::ref_id, field::mapq, field::alignment>{}};
 
-    auto mapq_filter = std::views::filter([] (auto & rec) { return get<field::MAPQ>(rec) >= 30; });
+    auto mapq_filter = std::views::filter([] (auto & rec) { return get<field::mapq>(rec) >= 30; });
 
     for (auto & [id, ref_id, mapq, alignment] : mapping_file | mapq_filter)
     {

diff --git a/doc/tutorial/alignment_file/alignment_file_solution3.cpp b/doc/tutorial/alignment_file/alignment_file_solution3.cpp
@@ -10,7 +10,7 @@ int main()
     std::vector<std::vector<dna4>> seqs = {"ACGATCGACTAGCTACGATCAGCTAGCAG"_dna4, "AGAAAGAGCGAGGCTATTTTAGCGAGTTA"_dna4};
 
     auto tmp_dir = std::filesystem::temp_directory_path();
-    alignment_file_output fout{tmp_dir/"my.sam", fields<field::ID, field::SEQ>{}};
+    alignment_file_output fout{tmp_dir/"my.sam", fields<field::id, field::seq>{}};
 
     for (size_t i = 0; i < ids.size(); ++i)
     {

diff --git a/doc/tutorial/alignment_file/index.md b/doc/tutorial/alignment_file/index.md
@@ -58,22 +58,22 @@ with the only difference that **the header is mandatory**.
 
 The Alignment file abstraction supports writing the following fields:
 
-1. field::SEQ
-2. field::ID
-3. field::OFFSET
-4. field::REF_SEQ
-5. field::REF_ID
-6. field::REF_OFFSET
-7. field::ALIGNMENT
-8. field::MAPQ
-9. field::FLAG
-10. field::QUAL
-11. field::MATE
-12. field::TAGS
-13. field::EVALUE
-14. field::BIT_SCORE
-
-There is an additional field called seqan3::field::HEADER_PTR.
+1. field::seq
+2. field::id
+3. field::offset
+4. field::ref_seq
+5. field::ref_id
+6. field::ref_offset
+7. field::alignment
+8. field::mapq
+9. field::flag
+10. field::qual
+11. field::mate
+12. field::tags
+13. field::evalue
+14. field::bit_score
+
+There is an additional field called seqan3::field::header_ptr.
 It is used to transfer header information from seqan3::alignment_file_input to seqan3::alignment_file_output,
 but you needn't deal with this field manually.
 
@@ -82,17 +82,17 @@ To make things clearer, here is the table of SAM columns connected to the corres
 
 | #  | SAM Column ID |  FIELD name                                                                       |
 |:--:|:--------------|:----------------------------------------------------------------------------------|
-| 1  | QNAME         | seqan3::field::ID                                                                 |
-| 2  | FLAG          | seqan3::field::FLAG                                                               |
-| 3  | RNAME         | seqan3::field::REF_ID                                                             |
-| 4  | POS           | seqan3::field::REF_OFFSET                                                         |
-| 5  | MAPQ          | seqan3::field::MAPQ                                                               |
-| 6  | CIGAR         | implicitly stored in seqan3::field::ALIGNMENT or directly in seqan3::field::CIGAR |
-| 7  | RNEXT         | seqan3::field::MATE (tuple pos 0)                                                 |
-| 8  | PNEXT         | seqan3::field::MATE (tuple pos 1)                                                 |
-| 9  | TLEN          | seqan3::field::MATE (tuple pos 2)                                                 |
-| 10 | SEQ           | seqan3::field::SEQ                                                                |
-| 11 | QUAL          | seqan3::field::QUAL                                                               |
+| 1  | QNAME         | seqan3::field::id                                                                 |
+| 2  | FLAG          | seqan3::field::flag                                                               |
+| 3  | RNAME         | seqan3::field::ref_id                                                             |
+| 4  | POS           | seqan3::field::ref_offset                                                         |
+| 5  | MAPQ          | seqan3::field::mapq                                                               |
+| 6  | CIGAR         | implicitly stored in seqan3::field::alignment or directly in seqan3::field::cigar |
+| 7  | RNEXT         | seqan3::field::mate (tuple pos 0)                                                 |
+| 8  | PNEXT         | seqan3::field::mate (tuple pos 1)                                                 |
+| 9  | TLEN          | seqan3::field::mate (tuple pos 2)                                                 |
+| 10 | SEQ           | seqan3::field::seq                                                                |
+| 11 | QUAL          | seqan3::field::qual                                                               |
 
 ## File extensions
 
@@ -149,7 +149,7 @@ Note that this is possible for all SeqAn file objects.
 Let's assume we want to compute the average mapping quality of a SAM file.
 
 For this purpose, write a small program that
-    * only reads the mapping quality (field::MAPQ) out of a SAM file and
+    * only reads the mapping quality (field::mapq) out of a SAM file and
     * computes the average of all qualities.
 
 Use the following file to test your program:
@@ -246,7 +246,7 @@ With those information do the following:
   * For the resulting alignments, print which read was mapped against with reference id and
     the number of seqan3::gap's in each sequence (aligned reference and read sequence).
 
-\note reference ids (field::REF_ID) are given as an index of type `std::optional<int32_t>`
+\note reference ids (field::ref_id) are given as an index of type `std::optional<int32_t>`
       that denote the position of the reference id in the `ref_ids` vector passed to the alignment file.
 
 Your program should print the following:
@@ -268,7 +268,7 @@ r004 mapped against 1 with 0 gaps in the read sequence and 0 gaps in the referen
 ## Reading the CIGAR string
 
 If you are accustomed to the raw CIGAR information, we also provide reading the cigar information into a
-`std::vector<seqan3::cigar>` if you specify the `seqan3::field::CIGAR`.
+`std::vector<seqan3::cigar>` if you specify the `seqan3::field::cigar`.
 
 \snippet doc/tutorial/alignment_file/alignment_file_read_cigar.cpp code
 

diff --git a/doc/tutorial/read_mapper/read_mapper_step4.cpp b/doc/tutorial/read_mapper/read_mapper_step4.cpp
@@ -50,13 +50,13 @@ void map_reads(std::filesystem::path const & query_path,
     sequence_file_input query_in{query_path};
 
 //! [alignment_file_output]
-    alignment_file_output sam_out{sam_path, fields<field::SEQ,
-                                                   field::ID,
-                                                   field::REF_ID,
-                                                   field::REF_OFFSET,
-                                                   field::ALIGNMENT,
-                                                   field::QUAL,
-                                                   field::MAPQ>{}};
+    alignment_file_output sam_out{sam_path, fields<field::seq,
+                                                   field::id,
+                                                   field::ref_id,
+                                                   field::ref_offset,
+                                                   field::alignment,
+                                                   field::qual,
+                                                   field::mapq>{}};
 //! [alignment_file_output]
 
     configuration const search_config = search_cfg::max_error{search_cfg::total{errors}} |

diff --git a/doc/tutorial/sequence_file/index.md b/doc/tutorial/sequence_file/index.md
@@ -52,7 +52,7 @@ hoping that it will make the following tutorial easier to understand.
 As mentioned above, our file object is a range over records.
 More specifically over objects of type seqan3::record which is basically just a std::tuple that holds the data.
 To identify or specialise which data is read/written and contained in the records,
-we use seqan3::field tags (e.g. seqan3::field::SEQ denotes sequence information).
+we use seqan3::field tags (e.g. seqan3::field::seq denotes sequence information).
 The seqan3::field tags are shared between file formats and allow for easy file conversion.
 
 Output files can handle various types that fulfill the requirements of the format (e.g.
@@ -142,10 +142,10 @@ You can also customise this list if you want to allow different or additional fi
 
 The Sequence file abstraction supports reading four different fields:
 
-  1. seqan3::field::SEQ
-  2. seqan3::field::ID
-  3. seqan3::field::QUAL
-  4. seqan3::field::SEQ_QUAL
+  1. seqan3::field::seq
+  2. seqan3::field::id
+  3. seqan3::field::qual
+  4. seqan3::field::seq_qual
 
 The first three fields are retrieved by default (and in that order!).
 The last field may be selected to directly store sequence and qualities in a more memory-efficient
@@ -377,8 +377,8 @@ These work similarly to how they work on an std::vector.
 
 If you pass a tuple to `push_back()` or give arguments to `emplace_back()` the order of elements is assumed
 to be the same as the one in the seqan3::sequence_file_output::selected_field_ids.
-For the above example the default FASTA fields are first seqan3::field::SEQ,
-second seqan3::field::ID and the third one seqan3::field::QUAL.
+For the above example the default FASTA fields are first seqan3::field::seq,
+second seqan3::field::id and the third one seqan3::field::qual.
 You may give less fields than are selected if the actual format you are writing to can cope with less
 (e.g. for FastA it is sufficient to give sequence and name information).
 

diff --git a/doc/tutorial/sequence_file/sequence_file_snippets.cpp b/doc/tutorial/sequence_file/sequence_file_snippets.cpp
@@ -130,7 +130,7 @@ sequence_file_input fin2{std::filesystem::temp_directory_path()/"my.fastq"}; //
 
 for (auto && [rec1, rec2] : views::zip(fin1, fin2)) // && is important! because views::zip returns temporaries
 {
-    if (get<field::ID>(rec1) != get<field::ID>(rec2))
+    if (get<field::id>(rec1) != get<field::id>(rec2))
         throw std::runtime_error("Oh oh your pairs don't match.");
 }
 //![paired_reads]
@@ -144,7 +144,7 @@ for (auto && records : fin | ranges::view::chunk(10)) // && is important! becaus
 {
     // `records` contains 10 elements (or less at the end)
     debug_stream << "Taking the next 10 sequences:\n";
-    debug_stream << "ID:  " << get<field::ID>(*records.begin()) << '\n'; // prints first ID in batch
+    debug_stream << "ID:  " << get<field::id>(*records.begin()) << '\n'; // prints first ID in batch
 }
 //![read_in_batches]
 }
@@ -156,14 +156,14 @@ sequence_file_input fin{std::filesystem::temp_directory_path()/"my.fastq"};
 // std::views::filter takes a function object (a lambda in this case) as input that returns a boolean
 auto minimum_quality_filter = std::views::filter([] (auto const & rec)
 {
-    auto qual = get<field::QUAL>(rec) | std::views::transform([] (auto q) { return q.to_phred(); });
+    auto qual = get<field::qual>(rec) | std::views::transform([] (auto q) { return q.to_phred(); });
     double sum = ranges::accumulate(qual.begin(), qual.end(), 0);
     return sum / std::ranges::size(qual) >= 40; // minimum average quality >= 40
 });
 
 for (auto & rec : fin | minimum_quality_filter)
 {
-    debug_stream << "ID: " << get<field::ID>(rec) << '\n';
+    debug_stream << "ID: " << get<field::id>(rec) << '\n';
 }
 //![quality_filter]
 }

diff --git a/doc/tutorial/sequence_file/sequence_file_solution1.cpp b/doc/tutorial/sequence_file/sequence_file_solution1.cpp
@@ -57,9 +57,9 @@ int main()
 
     for (auto & rec : fin)
     {
-        debug_stream << "ID:  "  << get<field::ID>(rec) << '\n';
-        debug_stream << "SEQ: "  << get<field::SEQ>(rec) << '\n';
-        debug_stream << "QUAL: " << get<field::QUAL>(rec) << '\n';
+        debug_stream << "ID:  "  << get<field::id>(rec) << '\n';
+        debug_stream << "SEQ: "  << get<field::seq>(rec) << '\n';
+        debug_stream << "QUAL: " << get<field::qual>(rec) << '\n';
     }
 }
 //![solution]
diff --git a/doc/tutorial/sequence_file/sequence_file_solution3.cpp b/doc/tutorial/sequence_file/sequence_file_solution3.cpp
@@ -67,21 +67,21 @@ int main()
 
     auto length_filter = std::views::filter([] (auto const & rec)
     {
-        return std::ranges::size(get<field::SEQ>(rec)) >= 5;
+        return std::ranges::size(get<field::seq>(rec)) >= 5;
     });
 
     // you can use a for loop
 
     // for (auto & rec : fin | length_filter | std::views::take(2))
     // {
-    //     debug_stream << "ID: " << get<field::ID>(rec) << '\n';
+    //     debug_stream << "ID: " << get<field::id>(rec) << '\n';
     // }
 
     // But you can also do this to retrieve all IDs into a vector:
     std::vector<std::string> ids = fin
                                  | length_filter                                // apply length filter
                                  | std::views::take(2)                          // take first two records
-                                 | views::get<field::ID>                        // select only ID from record
+                                 | views::get<field::id>                        // select only ID from record
                                  | views::convert<std::string &&>               // mark ID to be moved out of record
                                  | views::to<std::vector<std::string>>;         // convert to container
     // Note that you need to know the type of id (std::string)

diff --git a/doc/tutorial/sequence_file/sequence_file_solution4.cpp b/doc/tutorial/sequence_file/sequence_file_solution4.cpp
@@ -75,7 +75,7 @@ int main()
 
     auto length_filter = std::views::filter([] (auto const & rec)
     {
-        return std::ranges::size(get<field::SEQ>(rec)) >= 5;
+        return std::ranges::size(get<field::seq>(rec)) >= 5;
     });
 
     for (auto & rec : fin | length_filter)

diff --git a/doc/tutorial/sequence_file/sequence_file_solution5.cpp b/doc/tutorial/sequence_file/sequence_file_solution5.cpp
@@ -76,7 +76,7 @@ int main()
 
     auto length_filter = std::views::filter([] (auto & rec)
     {
-        return std::ranges::size(get<field::SEQ>(rec)) >= 5;
+        return std::ranges::size(get<field::seq>(rec)) >= 5;
     });
 
     fout = fin | length_filter;