diff --git a/Makefile b/Makefile index 2d6bee63a..b1923a96f 100644 --- a/Makefile +++ b/Makefile @@ -66,4 +66,14 @@ clean: mostlyclean -rm -rf .jekyll-cache .jekyll-metadata _site +# Checking of MM tag perl script +check test: + cd test/SAMtags; \ + for i in `echo *.sam | sed 's/\.sam//g'`; \ + do \ + ./parse_mm.pl $$i.sam > _; \ + cmp _ $$i.txt; \ + done; \ + rm _ + .PHONY: all pdf diff diffs show-styles mostlyclean clean diff --git a/SAMtags.tex b/SAMtags.tex index 722fffa6b..cf6b20273 100644 --- a/SAMtags.tex +++ b/SAMtags.tex @@ -471,6 +471,169 @@ \subsubsection{Color space} Color read quality on the original strand of the read. Same encoding as {\sf QUAL}; same length as {\tt CS}. \end{description} +\section{Draft tags} + +These are tags which have been proposed and are broadly accepted to +become standard tags, but a review or probationary period has been +deemed useful. They use the locally-defined tag namespace and +processing software should consider that the tags may have local usage +for other purposes. + +\begin{center}\small +% This table is sorted alphabetically +\begin{longtable}{ccp{12.5cm}} + \hline + {\bf Tag} & {\bf Type} & {\bf Description} \\ + \hline + \endhead + {\tt Mm} & Z & Base modifications / methylation \\ +% {\tt MP} & Z & Base modification qualities \\ + {\tt Ml} & B,C & Base modification probabilities \\ +\end{longtable} +\end{center} + +\subsection{Base modifications} + +Base modifications, including base methylation, are represented as a series of edits from the primary unmodified sequence as originally reported by the sequencing instrument. +This potentially differs to the sequence stored in the main SAM {\sf SEQ} field if the latter has been reverse complemented, in which case SAM {\sf FLAG} 0x10 must be set. +This means modification positions are also recorded against the original orientation (i.e. starting at the 5' end), and count the original base types. + +Each modified base listed also has a quality value associated with it. +Given the unmodified base already has a phred likelihood, this base modification quality should be interpreted as the likelihood of this modification being correct given an assumption the original call is correct. + +\begin{description} +\item[Mm:Z:\tagregex{([ACGTUN][-+]([a-z]+|[0-9]+)(,[0-9]+)*;)*}] +\hfill\\ +The first character is the unmodified ``fundamental'' base as reported +by the sequencing instrument for the top strand. +It must be one of {\tt A}, {\tt C}, {\tt G}, {\tt T}, {\tt U} (if RNA) or {\tt N} for anything else, including any IUPAC ambiguity codes in the reported SEQ field. +Note {\tt N} may be used to match any base rather than specifically an {\tt N} call by the sequencing instrument. +This may be used in situations where the base modification is not a derivation of a standard base type. +This is followed by either plus or minus indicating the strand the modification was observed on (relative to the original sequenced strand of {\sf SEQ} with plus meaning same orientation),\footnote{Hence a tool that may reverse complement sequences does not need to understand how to manipulate the {\tt Mm} and {\tt Ml} tags.} and one or more base modification codes. +This is then followed by a comma separated list of how many unmodified seq bases of the stated base type to skip, stored as a delta to the last and starting with 0 as the first (or next) base, starting from the uncomplemented 5' end of the {\sf SEQ} field. +This number series is comparable to the numbers in an {\tt MD} tag, +albeit counting specific base types only and potentially reverse-complemented. + +For example {\tt C+m,5,12,0;} tells us there are three +5-Methylcytosine bases on the top strand of {\sf SEQ}. +The first 5 {\tt C} bases are unmodified and the 6th is modified, as are the 19th (with 12 between the 6th and 19th) and 20th. +Similarly {\tt G-m,14;} indicates the 15th {\tt G} is a 5-Methylcytosine on the opposite strand (still counting using the top strand base calls from the 5' end). +When the alignment record is reverse complemented (SAM flag 0x10) these two examples do not change since the tag always refers to the as-sequenced orientation. +See the test/SAMtags/MM-orient.sam file for examples. + +This permits modifications to be listed on either strand with the rare potential for both strands to have a modification at the same site. +If SAM FLAG 0x10 is set, indicating that SEQ has been reverse complemented from the sequence observed by the sequencing machine, note that these base modification field values will be in the opposite orientation to SEQ and other derived SAM fields. + +Note it is permitted for the coordinate list to be empty (for example {\tt Mm:Z:C+m;}), which may be used as an explicit indicator that this base modification is not present. +It is not permitted for coordinates to be beyond the length of the sequence. + +When multiple modifications are listed, for example {\tt C+mh,5,12,0;}, it indicates the modification may be any of the stated bases. +The associated confidence values in the {\tt Ml} tag may be used to determine the relative likelihoods between the options. +The example above is equivalent to {\tt C+m,5,12,0;C+h,5,12,0;}, although this will have a different ordering of confidence values in {\tt Ml}. +Note ChEBI codes cannot be used in the multi-modification form (such as the {\tt C+mh} example above). + +If the modification is not one of the standard common types (listed below) it can be specified as a numeric ChEBI code. +For example {\tt C+76792,57;} is the same as {\tt C+h,57;}. + +An unmodified base of {\tt N} means count any base in {\sf SEQ}, not only those of {\tt N}. +Thus {\tt N+n,100;} means the 101st base is Xanthosine (n), irrespective of the sequence composition. + +The standard code types and their associated ChEBI values are listed +below, taken from Viner {\it et al.}% +\footnote{Coby Viner {\it et al.}, \emph{Modeling methyl-sensitive +transcription factor motifs with an expanded epigenetic alphabet}, \url{https://www.biorxiv.org/content/10.1101/043794v1}.} +Additionally ambiguity codes {\tt A}, {\tt C}, {\tt G}, {\tt T} and {\tt U} +exist to represent unspecified modifications bases of their respective +canonical base types, plus code {\tt N} to represent an unspecified +modification of any base type. + +\begin{center} +\begin{tabular}{lllll} +{\bf Unmodified base} & {\bf Code} & {\bf Abbreviation} & {\bf Name} & {\bf ChEBI} \\ +\hline +C & m & 5mC & 5-Methylcytosine & 27551 \\ +C & h & 5hmC & 5-Hydroxymethylcytosine & 76792 \\ +C & f & 5fC & 5-Formylcytosine & 76794 \\ +C & c & 5caC & 5-Carboxylcytosine & 76793 \\ +C & C & & Ambiguity code; any C mod & \\ +\hline +T & g & 5hmU & 5-Hydroxymethyluracil & 16964 \\ +T & e & 5fU & 5-Formyluracil & 80961 \\ +T & b & 5caU & 5-Carboxyluracil & 17477 \\ +T & T & & Ambiguity code; any T mod & \\ +\hline +U & U & & Ambiguity code; any U mod & \\ +\hline +A & a & 6mA & 6-Methyladenine & 28871 \\ +A & A & & Ambiguity code; any A mod & \\ +\hline +G & o & 8oxoG & 8-Oxoguanine & 44605 \\ +G & G & & Ambiguity code; any G mod & \\ +\hline +N & n & Xao & Xanthosine & 18107 \\ +N & N & & Ambiguity code; any mod & \\ +\end{tabular} +\end{center} + +% MP was the former quality score for MM. However being Phred scores +% it can only reasonable record probabilities for highly likely +% events, making it inappropriate for callers (eg ONT's) that wish to +% jointly call probabilities for the entire trained set of +% possibilities. We could use log-odds, similar to how early Illumina +% runs did to record likelihoods for A, C, G and T irrespective of +% call, but for now we're using linear-scaled probabilities. These +% are in the ML tag. +% +% The MP tag is left here for now as the jury is still out on whether +% we'll need it in the future. +% +% \item[MP:Z:\tagvalue{qualities}] +% \hfill\\ +% The optional {\tt MP} tag lists the Phred qualities of each modification listed in the {\tt MM} tag in the order they occur. +% The qualities are encoded in the same manner as the primary {\sf QUAL} field; one byte per quality with ASCII value Phred score + 33. +% A space character (`{\tt \textvisiblespace}') should be used as a separator between concatenated quality strings when multiple modification lists are present in the {\tt MM} tag. +% The length should match the number of position deltas from {\tt MM} plus 1 per space character required. +% +% For example ``{\tt MM:Z:C+m,5,12,3;C+h,57;}'' may have an associated +% quality tag of ``{\tt MP:Z:5EB /}''. +% +% Where multiple modification types are listed together, such as in ``{\tt MM:Z:C+mh,5,12,3;}'' the quality values are interleaved in order ({\tt m} at 6, {\tt h} at 6, {\tt m} at 19, {\tt h} at 19 and so on), giving 6 quality values in total for this example. +% +% Quality values for ambiguity codes give the likelihood that the +% modification is one of the possible codes compatible with that +% ambiguity code. For example {\tt MM:Z:C+C,10; MP:Z:+} indicates a C +% call with an unspecified modification and the phred score of 10 (ASCII +% value {\tt +}). This corresponds to a 90\% chance of the base being +% modified. +% +% To represent several possible modifications at the same site the {\tt MP} tag can be used to indicate the probabilities of each possibility. +% The values used should be absolute probabilities, not relative between the alternatives. +% For example, a C base that has 95\% chance of being modified with 5mC being three times more likely than 5hmC will encode 5mC with 67.5\% probability ($0.9 * 0.75$ giving phred score 5, ASCII value {\tt \&})and 5hmC with 22.5\% probability ($0.9 * 0.25$ giving phred score 1, ASCII value {\tt "}). +% This could be represented with ``{\tt MM:Z:C+m,10;C+h,10; MP:Z:" \&}''. + +\item[Ml:B:C,\tagvalue{scaled-probabilities}] +\hfill\\ +The optional {\tt Ml} tag lists the probability of each modification listed in the {\tt Mm} tag being correct, in the order that they occur. +The continuous probability range 0.0 to 1.0 is remapped in equal +sized portions to the discrete integers 0 to 255 inclusively. Thus the +probability range corresponding to integer value $N$ is $N/256$ to +$(N+1)/256$. + +The SAM encoding therefore uses a byte array of type `{\tt C}' with the number of elements matching the summation of the number of modifications listed as being present in the {\tt Mm} tag accounting for multi-modifications each having their own probability. + +For example ``{\tt Mm:Z:C+m,5,12;C+h,5,12;}'' may have an associated tag of ``{\tt Ml:B:C,204,89,26,130}''. + +If the above is rewritten in the multiple-modification form, the probabilities are interleaved in the order presented, giving ``{\tt Mm:Z:C+mh,5,12; Ml:B:C,204,26,89,130}''. +Note where several possible modifications are presented at the same site, the {\tt Ml} values represent the absolute probabilities of the modification call being correct and not the relative likelihood between the alternatives. +These probabilities should not sum to above 1.0 ($\approx 256$ in integer encoding, allowing for some minor rounding errors), but may sum to a lower total with the remainder representing the probability that none of the listed modification types are present. +In the example used above, the 6th {\tt C} has 80\% chance of being {\tt 5mC}, 10\% chance of being {\tt 5hmC} and 10\% chance of being an unmodified {\tt C}. + +{\tt Ml} values for ambiguity codes give the probability that the modification is one of the possible codes compatible with that ambiguity code. +For example {\tt Mm:Z:C+C,10; Ml:B:C,229} indicates a C call with a probability of 90\% of having some form of unspecified modification. + +\end{description} + + \section{Locally-defined tags} You can freely add new tags. @@ -492,6 +655,9 @@ \section{Tag History} \setlength{\parindent}{0pt} \newcommand*{\gap}{\vspace*{2ex}} +\subsubsection*{July 2021} +Added the Mm and Ml draft tags describing base modifications. + \subsubsection*{March 2020} Transcript strand tag TS added, equivalent to the locally-defined XS tag diff --git a/test/SAMtags/MM-chebi.sam b/test/SAMtags/MM-chebi.sam new file mode 100644 index 000000000..62920ecc1 --- /dev/null +++ b/test/SAMtags/MM-chebi.sam @@ -0,0 +1,2 @@ +@CO Separate m, h and N modifications +* 0 * 0 0 * * 0 0 AGCTCTCCAGAGTCGNACGCCATYCGCGCGCCACCA * Mm:Z:C+m,2,2,1,4,1;C+76792,6,7;N+n,15; Ml:B:C,102,128,153,179,204,161,187,212,169 diff --git a/test/SAMtags/MM-chebi.txt b/test/SAMtags/MM-chebi.txt new file mode 100644 index 000000000..902265613 --- /dev/null +++ b/test/SAMtags/MM-chebi.txt @@ -0,0 +1,36 @@ +A T +G C +C G +T A +C G +T A +Cm40 G +C G +A T +G C +A T +G C +T A +C G +G C +Nn83 N +A T +Cm50 G +G C +C(76792)63 G +Cm59 G +A T +T A +Y R +C G +G C +C G +G C +C G +G C +C G +Cm70 G +A T +C G +Cm79(76792)73 G +A T diff --git a/test/SAMtags/MM-double.sam b/test/SAMtags/MM-double.sam new file mode 100644 index 000000000..608516fc1 --- /dev/null +++ b/test/SAMtags/MM-double.sam @@ -0,0 +1,3 @@ +@CO Modifications called on both strands of the same record, +@CO including potentially at the same location simultaneously. +* 0 * 0 0 * * 0 0 AGGATCTCTAGCGGATCGGCGGGGGATATGCCATAT * Mm:Z:C+m,1,3,0;G-m,0,2,0,4;G+o,4; Ml:B:C,128,153,179,115,141,166,192,102 diff --git a/test/SAMtags/MM-double.txt b/test/SAMtags/MM-double.txt new file mode 100644 index 000000000..27c133fb5 --- /dev/null +++ b/test/SAMtags/MM-double.txt @@ -0,0 +1,36 @@ +A T +G Cm45 +G C +A T +T A +C G +T A +Cm50 G +T A +A T +G C +C G +G Cm55 +Go40 Cm65 +A T +T A +C G +G C +G C +C G +G C +G C +G Cm75 +G C +G C +A T +T A +A T +T A +G C +Cm59 G +Cm70 G +A T +T A +A T +T A diff --git a/test/SAMtags/MM-multi.sam b/test/SAMtags/MM-multi.sam new file mode 100644 index 000000000..b2259a09e --- /dev/null +++ b/test/SAMtags/MM-multi.sam @@ -0,0 +1,7 @@ +@CO Testing multiple m, h and N modifications on the same read. +@CO r1 has them separated out. +@CO r2 has them combined together, for example as produced by +@CO a joint basecaller which assigns probabilities to all +@CO trained events simultaneously. +r1 0 * 0 0 * * 0 0 AGCTCTCCAGAGTCGNACGCCATYCGCGCGCCACCA * Mm:Z:C+m,2,2,1,4,1;C+h,6,7;N+n,15,2; Ml:B:C,128,153,179,204,230,159,6,215,240 +r2 0 * 0 0 * * 0 0 AGCTCTCCAGAGTCGNACGCCATYCGCGCGCCACCA * Mm:Z:C+mh,2,2,0,0,4,1;N+n,15; Ml:B:C,77,159,103,133,128,108,154,82,179,57,204,31,240 diff --git a/test/SAMtags/MM-multi.txt b/test/SAMtags/MM-multi.txt new file mode 100644 index 000000000..a8c61bfc7 --- /dev/null +++ b/test/SAMtags/MM-multi.txt @@ -0,0 +1,73 @@ +A T +G C +C G +T A +C G +T A +Cm50 G +C G +A T +G C +A T +G C +T A +C G +G C +Nn84 N +A T +Cm59 G +Gn93 C +Ch62 G +Cm70 G +A T +T A +Y R +C G +G C +C G +G C +C G +G C +C G +Cm79 G +A T +C G +Cm90h2 G +A T + +A T +G C +C G +T A +C G +T A +Cm30h62 G +C G +A T +G C +A T +G C +T A +C G +G C +Nn93 N +A T +Cm40h52 G +G C +Cm50h42 G +Cm60h32 G +A T +T A +Y R +C G +G C +C G +G C +C G +G C +C G +Cm70h22 G +A T +C G +Cm79h12 G +A T diff --git a/test/SAMtags/MM-orient.sam b/test/SAMtags/MM-orient.sam new file mode 100644 index 000000000..363e7c2be --- /dev/null +++ b/test/SAMtags/MM-orient.sam @@ -0,0 +1,6 @@ +@CO Testing mods on top and bottom strand, but also in +@CO original vs reverse-complemented orientation +top-fwd 0 * 0 0 * * 0 0 AGGATCTCTAGCGGATCGGCGGGGGATATGCCATAT * Mm:Z:C+m,1,3,0; Ml:B:C,128,153,179 +top-rev 16 * 0 0 * * 0 0 ATATGGCATATCCCCCGCCGATCCGCTAGAGATCCT * Mm:Z:C+m,1,3,0; Ml:B:C,128,153,179 +bot-fwd 0 * 0 0 * * 0 0 AGGATCTCTAGCGGATCGGCGGGGGATATGCCATAT * Mm:Z:G-m,0,0,4,3; Ml:B:C,115,141,166,192 +bot-rev 16 * 0 0 * * 0 0 ATATGGCATATCCCCCGCCGATCCGCTAGAGATCCT * Mm:Z:G-m,0,0,4,3; Ml:B:C,115,141,166,192 diff --git a/test/SAMtags/MM-orient.txt b/test/SAMtags/MM-orient.txt new file mode 100644 index 000000000..5cb732b48 --- /dev/null +++ b/test/SAMtags/MM-orient.txt @@ -0,0 +1,147 @@ +A T +G C +G C +A T +T A +C G +T A +Cm50 G +T A +A T +G C +C G +G C +G C +A T +T A +C G +G C +G C +C G +G C +G C +G C +G C +G C +A T +T A +A T +T A +G C +Cm59 G +Cm70 G +A T +T A +A T +T A + +A T +G C +G C +A T +T A +C G +T A +Cm50 G +T A +A T +G C +C G +G C +G C +A T +T A +C G +G C +G C +C G +G C +G C +G C +G C +G C +A T +T A +A T +T A +G C +Cm59 G +Cm70 G +A T +T A +A T +T A + +A T +G Cm45 +G Cm55 +A T +T A +C G +T A +C G +T A +A T +G C +C G +G C +G C +A T +T A +C G +G C +G Cm65 +C G +G C +G C +G C +G Cm75 +G C +A T +T A +A T +T A +G C +C G +C G +A T +T A +A T +T A + +A T +G Cm45 +G Cm55 +A T +T A +C G +T A +C G +T A +A T +G C +C G +G C +G C +A T +T A +C G +G C +G Cm65 +C G +G C +G C +G C +G Cm75 +G C +A T +T A +A T +T A +G C +C G +C G +A T +T A +A T +T A diff --git a/test/SAMtags/README.md b/test/SAMtags/README.md new file mode 100644 index 000000000..d268a6cbd --- /dev/null +++ b/test/SAMtags/README.md @@ -0,0 +1,35 @@ +Mm and Ml auxiliary tags +======================== + +The purpose of these test files is to test parsing of the Mm and Ml +tags. These succint Mm and Ml tags are present in the .sam files, +with a more human readable expanded form in the .txt files. +Developers should check whether their implementation is able to +convert between the two forms. + +The .sam files are SAM format, but the only fields used for this +test are the reverse-complementation flag (FLAG bit 0x10), the +sequence, and the Mm and Ml tags. + +The .txt files uses one line per base, with a blank line separating +sequences. Each line consists of two tab-separated fields +representing the top (original as-sequenced orientation) and bottom +strand. + +Each field in the .txt files is a concatenation of the canonical base +and any modifications with their associated probability, expressed +here as a rounded-down percentage. The modification and percentage +are joined together with no punctuation, but for ChEBI numeric codes +the code is bracketted to distinguish it from the numeric percentage. +For example "Cm80(76792)73" is canonical base C, modification m (80%) +and modification ChEBI 76792 (73%). + + +The parse_mm.pl perl script can convert .sam to .txt files, but is for +demonstrative purposes only. Example usage to check files: + + for i in `echo *.sam|sed 's/\.sam//g'` + do + ./parse_mm.pl $i.sam > _ + cmp _ $i.txt + done diff --git a/test/SAMtags/parse_mm.pl b/test/SAMtags/parse_mm.pl new file mode 100755 index 000000000..b8cd21928 --- /dev/null +++ b/test/SAMtags/parse_mm.pl @@ -0,0 +1,111 @@ +#!/usr/bin/perl -w + +# Copyright (c) 2020 Genome Research Ltd. +# Author: James Bonfield +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger +# Institute nor the names of its contributors may be used to endorse or promote +# products derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +use strict; + +# Complement a sequence +sub comp { + my ($seq)=@_; + $seq =~ tr/ACGTUMRWSYKVHDBN/TGCAAKYWSRMBDHVN/; + return $seq; +} + +# Reverse complement a sequence +sub rc { + my ($seq)=@_; + $seq = reverse($seq); + return comp($seq); +} + +my $nseq = 0; +while (<>) { + chomp($_); # trim newline + next if /^\@/; + + my ($qname,$flag,$rname,$pos,$mapq,$cigar,$rnext, + $pnext,$tlen,$seq,$qual,@aux) = split("\t",$_); + my $dir = $flag & 0x10 ? "-" : "+"; + my ($mod_str)="@aux"=~m/M[mM]:Z:(\S+)/; + my ($mod_prob)="@aux"=~m/M[lL]:B:C,(\S+)/; + + # All counting is from 5' end irrespective of BAM FLAG 0x10 + $seq = rc($seq) if ($dir eq "-"); + + my @seq = split("", $seq); # array of seq bases + my @seqt = @seq; # orientation as shown in SAM + my @seqb = split("", comp($seq)); # plus a complemented copy + + $mod_str =~ s/;$//; # trim last ; to aid split() + my @mods = split(";", $mod_str); + my @probs = split(",", $mod_prob); + my $pnum = 0; + + print "\n" if $nseq++ > 0; + foreach (@mods) { + my ($base, $strand, $types, $pos) = $_ =~ m/([A-Z])([-+])([^,]+),(.*)/; + + my $i = 0; # I^{th} bosition in sequence + foreach my $delta (split(",", $pos)) { + # Skip $delta occurences of $base + do { + $delta-- if ($base eq "N" || $base eq $seq[$i]); + $i++; + } while ($delta >= 0); + $i--; + + # TypePercent combo + my $type_perc = ""; + foreach ($types =~ m/(\d+|.)/g) { + if (/\d/) { + # Avoid ChEBI numbers running into quality values. + $type_perc .= "($_)" . int(($probs[$pnum++]+0.5)/256.0*100); + } else { + # Integer qualities represent 1/256th of the total + # probability space, with each bin being equal size. + # We don't know where in that range our actual + # probability was, so we use the mid-point (+0.5 below). + $type_perc .= "$_" . int(($probs[$pnum++]+0.5)/256.0*100); + } + } + + # Add to top or bottom seq + if ($strand eq "+") { + $seqt[$i] .= $type_perc; + } else { + $seqb[$i] .= $type_perc; + } + $i++; + } + } + + for (my $i=0; $i<=$#seq; $i++) { + print "$seqt[$i]\t$seqb[$i]\n"; + } +}