# Example usage

To use `labtools` in a project:

## Sequence Design Tools

### Imports

In [1]:
from labtools import shuffle

### Shuffle a sequence

Create shuffles of the alphabet:

In [2]:
shuffles_list, names_list = shuffle.windowed_shuffle("ABCDEFGHIJKLMNOPQRSTUVWYXZ")
shuffles_list

['ceadbFGHIJKLMNOPQRSTUVWYXZ',
 'AedbfcGHIJKLMNOPQRSTUVWYXZ',
 'ABgdcfeHIJKLMNOPQRSTUVWYXZ',
 'ABCeghdfIJKLMNOPQRSTUVWYXZ',
 'ABCDiegfhJKLMNOPQRSTUVWYXZ',
 'ABCDEhifgjKLMNOPQRSTUVWYXZ',
 'ABCDEFkigjhLMNOPQRSTUVWYXZ',
 'ABCDEFGkjilhMNOPQRSTUVWYXZ',
 'ABCDEFGHjkilmNOPQRSTUVWYXZ',
 'ABCDEFGHInmljkOPQRSTUVWYXZ',
 'ABCDEFGHIJmnolkPQRSTUVWYXZ',
 'ABCDEFGHIJKnlpomQRSTUVWYXZ',
 'ABCDEFGHIJKLnqpmoRSTUVWYXZ',
 'ABCDEFGHIJKLMnropqSTUVWYXZ',
 'ABCDEFGHIJKLMNsrqopTUVWYXZ',
 'ABCDEFGHIJKLMNOtprqsUVWYXZ',
 'ABCDEFGHIJKLMNOPstrquVWYXZ',
 'ABCDEFGHIJKLMNOPQvtsruWYXZ',
 'ABCDEFGHIJKLMNOPQRwsutvYXZ',
 'ABCDEFGHIJKLMNOPQRSuywvtXZ',
 'ABCDEFGHIJKLMNOPQRSTyxwuvZ',
 'ABCDEFGHIJKLMNOPQRSTUzvxwy']

## Sequencing Analysis Tools

### Sort Processing Example
#### What you need
* fastq file for each sample in the sort
* bin counts (cells per bin) and bin values (mean or median fluorescence of the bin)
* **if you want perfect matches**: a csv with one column headered as "ArrayDNA" which contains your 120 bp AD sequences as DNA
* the sequence directly preceding your AD sequence in your reads (anchor sequence)
* if you have barcodes, you need the sequence directly preceding them and anteceding them (anchor sequences)

#### What you get out
* a dataframe with your AD tiles as indices, normalized scores for tile at each bin, and the tile Activity value

**NOTE: the read cutoff is 10 reads total summed across all bins** This means that if a tile is not found at least 10 times combined across any of the fastq files, it will not be analysed. 

### Imports

In [3]:
from labtools.adtools import sort

### Initialize a sort

In [4]:
my_sort = sort.Sort(["../exampledata/bin1.fastq", "../exampledata/bin2.fastq", "../exampledata/bin3.fastq", 
                "../exampledata/bin4.fastq"], bin_counts = [100000,100000,100000,100000], 
               bin_values = [61,141,251,1462], design_file = "../exampledata/unique_seqs.csv")

### Get activity values

In [5]:
activities, total_reads, reads_per_bin = my_sort.process()

In [6]:
# activity + normalized abundance in each bin
activities

Unnamed: 0,0,1,2,3,Activity
GAAGATCCAACTTGGTTTGATTCTGGTTCTCAATTTATCTTAAATTCTCAACAATTGGTTGAAGCTTTGTCTTTGTGTGATGATGCTGCTGGTTCTCAAGATAGAGAAGAGAATACTAAT,0.279148,0.293312,0.427540,0.000000,165.697515
GAAGCTTTGTCTTTGTGTGATGATTTGTTGGGTGATCAAGATAGAGAAGAGAATGATAATGATGGTGATTTGAAAGATAAACAACCATGTATTGCTGATTATGCTCATTTGGGTCCAGAA,0.110487,0.540278,0.325749,0.023487,199.019328
GATTTGGCTGAAGATGATGAAGTTATGTGTATGGAAGATGAAGTTCAATCTATTCAACCAAATCATGAAAGACCAGATGATGGTCCAGAATTGGAACATGGTTTGGAGAATGGTGCTAAA,0.376741,0.492582,0.130676,0.000000,125.235085
TCTGAATTGACTTCTACTTTGGGTATTTCTCATAGATTGCCACAATCTTTGACTCCATGTGTTAAGACTGGTTCTTTGCAATCTGGTGGTTTGGTTCAATCTGTTCCATTTGAAGAATTG,0.362058,0.279914,0.358029,0.000000,151.418558
AATCATGAAAGACCATCTTCTGGTCCAGAAGCTGAACATGGTTTGGAGAATGGTGCTAAAGAAATGGCTGATGATAAAGAAGAACAAGAGAAAGATAGAGATAATGAGAATCAAGGTGAA,0.611774,0.247237,0.093699,0.047290,164.835151
...,...,...,...,...,...
GACCCAACTGAATGGTTTGATTCTGGTGCTCAATTTATCTTGAATGCTCAACAATTGGTTGAAGCTCAATGTTTGGATGATAATTTGACTAGAGAATTGGAATCTAATGATGGTGCTTTG,0.000000,0.000000,0.000000,1.000000,1462.000000
TCTACTGATTCTACTCCAATGTTTGATTATGATAATTTGGAAGATAATTCTAAAGATTGGACTTCTTTGTTTGATAATGATATTCCAGTTACTACTGATGATGTTTCTTTGGCTGATAAA,0.000000,0.000000,0.000000,1.000000,1462.000000
TCTACTGGTCAAGTCTTGTTTGATATTGATGACTTTAGATGGTTGTTGGATCCAGATGATGAACAATTGGGTAAAGAAGCTATCTTGTCTGATCAATTTGGTAAACCAACTCCAGAGAAT,0.000000,0.000000,0.000000,1.000000,1462.000000
GAAGATCCAACTTCTGATTCTGCTATTCAACAATTGTGGAATCAAGGATTCTTGTTTGTTGAATCTTTGTCTTTGTGTGATGATTTGTTGGGTTCTCAAGATAGAGAAGAGAATACTAAT,0.000000,0.000000,0.000000,1.000000,1462.000000


In [7]:
# the total reads for each sequence
total_reads

GAAGATCCAACTTGGTTTGATTCTGGTTCTCAATTTATCTTAAATTCTCAACAATTGGTTGAAGCTTTGTCTTTGTGTGATGATGCTGCTGGTTCTCAAGATAGAGAAGAGAATACTAAT    44.0
GAAGCTTTGTCTTTGTGTGATGATTTGTTGGGTGATCAAGATAGAGAAGAGAATGATAATGATGGTGATTTGAAAGATAAACAACCATGTATTGCTGATTATGCTCATTTGGGTCCAGAA    21.0
GATTTGGCTGAAGATGATGAAGTTATGTGTATGGAAGATGAAGTTCAATCTATTCAACCAAATCATGAAAGACCAGATGATGGTCCAGAATTGGAACATGGTTTGGAGAATGGTGCTAAA    49.0
TCTGAATTGACTTCTACTTTGGGTATTTCTCATAGATTGCCACAATCTTTGACTCCATGTGTTAAGACTGGTTCTTTGCAATCTGGTGGTTTGGTTCAATCTGTTCCATTTGAAGAATTG    70.0
AATCATGAAAGACCATCTTCTGGTCCAGAAGCTGAACATGGTTTGGAGAATGGTGCTAAAGAAATGGCTGATGATAAAGAAGAACAAGAGAAAGATAGAGATAATGAGAATCAAGGTGAA    20.0
                                                                                                                            ... 
GACCCAACTGAATGGTTTGATTCTGGTGCTCAATTTATCTTGAATGCTCAACAATTGGTTGAAGCTCAATGTTTGGATGATAATTTGACTAGAGAATTGGAATCTAATGATGGTGCTTTG    23.0
TCTACTGATTCTACTCCAATGTTTGATTATGATAATTTGGAAGATAATTCTAAAGATTGGACTTCTTTGTTTGATAATGATATTCCAGTTACTACTG

In [8]:
# the reads of each tile per bin
reads_per_bin

Unnamed: 0,0,1,2,3
GAAGATCCAACTTGGTTTGATTCTGGTTCTCAATTTATCTTAAATTCTCAACAATTGGTTGAAGCTTTGTCTTTGTGTGATGATGCTGCTGGTTCTCAAGATAGAGAAGAGAATACTAAT,11.0,13.0,20.0,0.0
GAAGCTTTGTCTTTGTGTGATGATTTGTTGGGTGATCAAGATAGAGAAGAGAATGATAATGATGGTGATTTGAAAGATAAACAACCATGTATTGCTGATTATGCTCATTTGGGTCCAGAA,2.0,11.0,7.0,1.0
GATTTGGCTGAAGATGATGAAGTTATGTGTATGGAAGATGAAGTTCAATCTATTCAACCAAATCATGAAAGACCAGATGATGGTCCAGAATTGGAACATGGTTTGGAGAATGGTGCTAAA,17.0,25.0,7.0,0.0
TCTGAATTGACTTCTACTTTGGGTATTTCTCATAGATTGCCACAATCTTTGACTCCATGTGTTAAGACTGGTTCTTTGCAATCTGGTGGTTTGGTTCAATCTGTTCCATTTGAAGAATTG,23.0,20.0,27.0,0.0
AATCATGAAAGACCATCTTCTGGTCCAGAAGCTGAACATGGTTTGGAGAATGGTGCTAAAGAAATGGCTGATGATAAAGAAGAACAAGAGAAAGATAGAGATAATGAGAATCAAGGTGAA,11.0,5.0,2.0,2.0
...,...,...,...,...
GACCCAACTGAATGGTTTGATTCTGGTGCTCAATTTATCTTGAATGCTCAACAATTGGTTGAAGCTCAATGTTTGGATGATAATTTGACTAGAGAATTGGAATCTAATGATGGTGCTTTG,0.0,0.0,0.0,23.0
TCTACTGATTCTACTCCAATGTTTGATTATGATAATTTGGAAGATAATTCTAAAGATTGGACTTCTTTGTTTGATAATGATATTCCAGTTACTACTGATGATGTTTCTTTGGCTGATAAA,0.0,0.0,0.0,24.0
TCTACTGGTCAAGTCTTGTTTGATATTGATGACTTTAGATGGTTGTTGGATCCAGATGATGAACAATTGGGTAAAGAAGCTATCTTGTCTGATCAATTTGGTAAACCAACTCCAGAGAAT,0.0,0.0,0.0,12.0
GAAGATCCAACTTCTGATTCTGCTATTCAACAATTGTGGAATCAAGGATTCTTGTTTGTTGAATCTTTGTCTTTGTGTGATGATTTGTTGGGTTCTCAAGATAGAGAAGAGAATACTAAT,0.0,0.0,0.0,11.0


#### Locate any tiles, not just perfect matches
Simply do not include the design file. A short (less than 120 bp) tile will probably appear with a significant number of reads. This is **probably** your untransformed background (cells transformed with a plasmid that did not get a tile). Most of the non-perfect matching tiles will be sequencing errors. You might find a way to map these back to their true tile. A few of the non-perfect matching tiles (likely those with a significant number of reads) will be sequencing library PCR amplification errors. An even fewer number of the non-perfect matching reads could be mutants that arose within the actual cell, or free tiles. Someone may put in the effort to distinguish these categories in the future.

In [9]:
my_sort = sort.Sort(["../exampledata/bin1.fastq", "../exampledata/bin2.fastq", "../exampledata/bin3.fastq", 
                "../exampledata/bin4.fastq"], bin_counts = [100000,100000,100000,100000], 
               bin_values = [61,141,251,1462])
activities_no_design, numreads_total, _ = my_sort.process()

In [10]:
activities_no_design

Unnamed: 0,0,1,2,3,Activity
GAAGATCCAACTTGGTTTGATTCTGGTTCTCAATTTATCTTAAATTCTCAACAATTGGTTGAAGCTTTGTCTTTGTGTGATGATGCTGCTGGTTCTCAAGATAGAGAAGAGAATACTAAT,0.269721,0.290977,0.439302,0.000000,167.745576
GAAGCTTTGTCTTTGTGTGATGATTTGTTGGGTGATCAAGATAGAGAAGAGAATGATAATGATGGTGATTTGAAAGATAAACAACCATGTATTGCTGATTATGCTCATTTGGGTCCAGAA,0.106476,0.534574,0.333835,0.025115,202.380348
GATTTGGCTGAAGATGATGAAGTTATGTGTATGGAAGATGAAGTTCAATCTATTCAACCAAATCATGAAAGACCAGATGATGGTCCAGAATTGGAACATGGTTTGGAGAATGGTGCTAAA,0.368831,0.495122,0.136047,0.000000,126.458644
GGTTAATTAAGGCGCGCCACTTCTAAATAAGCGA,0.224803,0.533542,0.241655,0.000000,149.597758
TCTGAATTGACTTCTACTTTGGGTATTTCTCATAGATTGCCACAATCTTTGACTCCATGTGTTAAGACTGGTTCTTTGCAATCTGGTGGTTTGGTTCAATCTGTTCCATTTGAAGAATTG,0.351449,0.278970,0.369581,0.000000,153.537999
...,...,...,...,...,...
TCTACTGATTCTACTCCAATGTTTGATTATGATAATTTGGAAGATAATTCTAAAGATTGGACTTCTTTGTTTGATAATGATATTCCAGTTACTACTGATGATGTTTCTTTGGCTGATAAA,0.000000,0.000000,0.000000,1.000000,1462.000000
TCTACTGGTCAAGTCTTGTTTGATATTGATGACTTTAGATGGTTGTTGGATCCAGATGATGAACAATTGGGTAAAGAAGCTATCTTGTCTGATCAATTTGGTAAACCAACTCCAGAGAAT,0.000000,0.000000,0.000000,1.000000,1462.000000
AATACTCCAACTCCACCATCTTTGGTTGATGGTGTTGCTGGTGATGAAGAAGCATTTGATGAGATGTTTGATCCATTCTTTGAAGAATTGGATTCTATTCCAGAAGCTGCTTTGTGATAA,0.000000,0.000000,0.000000,1.000000,1462.000000
GAAGATCCAACTTCTGATTCTGCTATTCAACAATTGTGGAATCAAGGATTCTTGTTTGTTGAATCTTTGTCTTTGTGTGATGATTTGTTGGGTTCTCAAGATAGAGAAGAGAATACTAAT,0.000000,0.000000,0.000000,1.000000,1462.000000


In [11]:
numreads_total

GAAGATCCAACTTGGTTTGATTCTGGTTCTCAATTTATCTTAAATTCTCAACAATTGGTTGAAGCTTTGTCTTTGTGTGATGATGCTGCTGGTTCTCAAGATAGAGAAGAGAATACTAAT    44.0
GAAGCTTTGTCTTTGTGTGATGATTTGTTGGGTGATCAAGATAGAGAAGAGAATGATAATGATGGTGATTTGAAAGATAAACAACCATGTATTGCTGATTATGCTCATTTGGGTCCAGAA    21.0
GATTTGGCTGAAGATGATGAAGTTATGTGTATGGAAGATGAAGTTCAATCTATTCAACCAAATCATGAAAGACCAGATGATGGTCCAGAATTGGAACATGGTTTGGAGAATGGTGCTAAA    49.0
GGTTAATTAAGGCGCGCCACTTCTAAATAAGCGA                                                                                          24.0
TCTGAATTGACTTCTACTTTGGGTATTTCTCATAGATTGCCACAATCTTTGACTCCATGTGTTAAGACTGGTTCTTTGCAATCTGGTGGTTTGGTTCAATCTGTTCCATTTGAAGAATTG    70.0
                                                                                                                            ... 
TCTACTGATTCTACTCCAATGTTTGATTATGATAATTTGGAAGATAATTCTAAAGATTGGACTTCTTTGTTTGATAATGATATTCCAGTTACTACTGATGATGTTTCTTTGGCTGATAAA    24.0
TCTACTGGTCAAGTCTTGTTTGATATTGATGACTTTAGATGGTTGTTGGATCCAGATGATGAACAATTGGGTAAAGAAGCTATCTTGTCTGATCAAT

### Get data for reads that include barcodes AND tiles
Support for barcoded only data may or may not be added in the future.

In [12]:
activities_barcoded, total_reads, reads_per_bin = my_sort.process(barcoded = True)

Note that you get back less tiles using this method. The reason for this is because the read must have a locatable barcode AND tile, which is less likely than having one or the other. The primary reason for doing this analysis is assessing per-transformant variation. The assumption is that unique tile-barcode pairs come from unique original transformants.

In [13]:
activities_barcoded

Unnamed: 0,Unnamed: 1,0,1,2,3,Activity
GAAGATCCAACTTGGTTTGATTCTGGTTCTCAATTTATCTTAAATTCTCAACAATTGGTTGAAGCTTTGTCTTTGTGTGATGATGCTGCTGGTTCTCAAGATAGAGAAGAGAATACTAAT,ATCGTTACGTC,0.289081,0.312643,0.398276,0.000000,161.683873
GATTTGGCTGAAGATGATGAAGTTATGTGTATGGAAGATGAAGTTCAATCTATTCAACCAAATCATGAAAGACCAGATGATGGTCCAGAATTGGAACATGGTTTGGAGAATGGTGCTAAA,GGAAGGTAGTA,0.234149,0.557113,0.208738,0.000000,145.229204
TCTGAATTGACTTCTACTTTGGGTATTTCTCATAGATTGCCACAATCTTTGACTCCATGTGTTAAGACTGGTTCTTTGCAATCTGGTGGTTTGGTTCAATCTGTTCCATTTGAAGAATTG,GAGACCAAATC,0.331376,0.288808,0.379816,0.000000,156.269710
GATGCTAAGAGAGATTTGGAAGAATGTCAGAAGATTGTTGCTGATCCATCTAATATTGAATTGGATGCTCCACCAGAAGCTAGATTGGCTCAAGCTGAATTTGGTTCTCAAGATTCTGCT,CAGGAAAATCG,0.305253,0.558685,0.136062,0.000000,131.546634
CAAGACTTTATTTGTTCTGTTACTCAAGATGTTGGTATTAATTATACTCATCCACAGAATTTGCCAGGTGTTTCTAAAGATGGTACTTCTGTCTTCTTCTTTAATAAGACTGCTCATGCT,ACATGAACTTA,0.244871,0.416160,0.280666,0.058303,229.301369
...,...,...,...,...,...,...
AATACTCCAACTCCACCATCTTTGGTTGATGGTGTTGCTGGTGATGAAGAAGCATTTGATGATTTGAAGATGTTTGATCCATTCTTTGAAGAATTGGATTCTATTCCAGAAGCTGCTTTG,CTGTCTTCATA,0.000000,0.000000,0.000000,1.000000,1462.000000
AAAGATAGAGATAATGAGAATCAAGGTGAAGAAGATCCAACTTGGTTTATTGATGCTTTGCAATTTAATGGTTTGGTTCAAGCTGAATCTGCTCAATTGTCTTTGTGTGATGATTTGTTG,GTGGCAGTATG,0.000000,0.000000,0.000000,1.000000,1462.000000
TATCCACCAAGAGATGATTATCATGAACAACCATATCCAGTTATGGATACTGAAGGTAATGTTATTGGTTTGACTGATTTGGAATGTGATGAAGAATTGTTGCCAATTTCTTGGGATTGG,CACCCACTCCG,0.000000,0.000000,0.000000,1.000000,1462.000000
TCTACTGATTCTACTCCAATGTTTGATTATGATAATTTGGAAGATAATTCTAAAGATTGGACTTCTTTGTTTGATAATGATATTCCAGTTACTACTGATGATGTTTCTTTGGCTGATAAA,CTCAGTAATGG,0.000000,0.000000,0.000000,1.000000,1462.000000


In [14]:
def format_index(s):
    return "font-size: 6pt;"
activities_no_design.style.applymap_index(format_index)
activities_no_design.style.apply({"font-size":"6pt"})

KeyError: "Column(s) ['font-size'] do not exist"

<pandas.io.formats.style.Styler at 0x7ff314bdbdf0>

### Support for custom anchor sequences
If your tiles or barcodes have a custom anchor sequence (AKA the non-variable portion of the read that is used to locate the variable portion of the read), you can specify that in the kwargs of your Sort(). This passes the arguments to the pull_A() function used on each read to locate the sequence of interest (AKA AD or tile). 

#### Use the sequence directly preceding the AD/tile as an anchor sequence. Additional characters between AD/tile and preceding anchor seqeunce will not work

My preceding anchor sequence is in blue while my AD/tile sequence is in green in this example read.

The anchor sequence preceding the barcode is purple while the anchor sequence anteceding is red. Barcode length is 11 by default. (Not necessary if you don't have barcodes)

##### Example read

<span style="font-size:10px">TCCCTGCGGGCTCTACTTCATCG<span style="color:blue;font-weight:800">GCTAGC</span><span style="color:green;font-weight:500;font-size:10px">**GGTTCTTCTAAATTGAGATGTGATAATAATGCTGCTGCTCATGTTAAATTGGATTCATTTCCAGCTGGTGTTAGATTTGATACATCTGATGAAGAATTGTTGGAACATTTGGCTGCTAAA**</span>TGATAAATAGATGA<span style="color:purple;font-weight:800">GGGCCCG</span>TCAACATAGAA<span style="color:red;font-weight:800">GGAGAGAA</span>ACATCTAAAAAAGCGATA</span>

#### Specify alternate values in a dictionary and pass that dictionary to Sort.process()

In [15]:
# these are the default values which would work for my example read above
# no input is required if they work for you
kwargs = {"ad_preceder":"GCTAGC", "bc_preceder":"GGGCCCG", "bc_anteceder":"GGAGAGAA", "bclength":11, "ad_length":120}

In [16]:
activities_barcoded, _, _ = my_sort.process(barcoded = True, **kwargs)

In [17]:
import pandas as pd

In [18]:
pd.set_option('display.max_colwidth', 1)

In [19]:
#activities_barcoded.style.set_properties(**{'background-color': 'grey','font-size': '6pt'})

#### Example output for pull_AD
This might be useful for someone who wants to use pull_AD to analyze reads for a purpose outside of calculating activities. 

In [20]:
from labtools.adtools.finder import pull_AD

In [21]:
read = "TCCCTGCGGGCTCTACTTCATCGGCTAGCGGTTCTTCTAAATTGAGATGTGATAATAATGCTGCTGCTCATGTTAAATTGGATTCATTTCCAGCTGGTGTTAGATTTGATACATCTGATGAAGAATTGTTGGAACATTTGGCTGCTAAATGATAAATAGATGAGGGCCCGTCAACATAGAAGGAGAGAAACATCTAAAAAAGCGATA"
pull_AD(read, kwargs)

('GGTTCTTCTAAATTGAGATGTGATAATAATGCTGCTGCTCATGTTAAATTGGATTCATTTCCAGCTGGTGTTAGATTTGATACATCTGATGAAGAATTGTTGGAACATTTGGCTGCTAAA',
 'TCAACATAGAA')

### Counting tiles in a fastq file
This essentially applies the pull_AD function shown above to every read in the fastq file. You can also pass the kwargs dict to it to specify custom anchor sequences. 

In [22]:
from labtools.adtools.pair_counter import seq_counter
seq_counter("../exampledata/mini.fastq")

GGTTCTTCTAAATTGAGATGTGATAATAATGCTGCTGCTCATGTTAAATTGGATTCATTTCCAGCTGGTGTTAGATTTGATACATCTGATGAAGAATTGTTGGAACATTTGGCTGCTAAA    1
GAAGAATTGTTTTTACATTTGTCTGCTAAGATTGGTAGATCTTCTAGGAAACCACATCCATTCTTGGATGAATTTATTCATACTTTGGTTGAAGAAGATGGTATTTGTAGAACTCATCCA    3
dtype: int64

#### Use barcoded = True  to count tile and barcode pairs
#### Use the design_file flag to look for specific tiles

In [23]:
seq_counter("../exampledata/bin1.fastq", barcoded = True, design_to_use="../exampledata/unique_seqs.csv")

GAAGATCCAACTTGGTTTGATTCTGGTTCTCAATTTATCTTAAATTCTCAACAATTGGTTGAAGCTTTGTCTTTGTGTGATGATGCTGCTGGTTCTCAAGATAGAGAAGAGAATACTAAT  ATCGTTACGTC    11.0
GAAGCTTTGTCTTTGTGTGATGATTTGTTGGGTGATCAAGATAGAGAAGAGAATGATAATGATGGTGATTTGAAAGATAAACAACCATGTATTGCTGATTATGCTCATTTGGGTCCAGAA  ACCATTTAGCG    1.0 
GATTTGGCTGAAGATGATGAAGTTATGTGTATGGAAGATGAAGTTCAATCTATTCAACCAAATCATGAAAGACCAGATGATGGTCCAGAATTGGAACATGGTTTGGAGAATGGTGCTAAA  GGAAGGTAGTA    5.0 
GGTCAAAGGAAGAGAAGGAAGATTACTCCAACTTTGGTTAATGATGAACCAGTTAGATGGCATAAGACTGGTAGAACTAAACCAGTTATGTTGTCTGGTGTTCAAAGAGGTTGTAAGAAA  TAGCTCAAACC    4.0 
TCTGAATTGACTTCTACTTTGGGTATTTCTCATAGATTGCCACAATCTTTGACTCCATGTGTTAAGACTGGTTCTTTGCAATCTGGTGGTTTGGTTCAATCTGTTCCATTTGAAGAATTG  GAGACCAAATC    21.0
                                                                                                                                         ... 
CAAGCTATTGCTAATGATCAACAAGCTGTTGAAGCTGCTTCTGCTTGTGATGATGCTGCTGGTTCTCAAGATAGAGAAGAGAATACTAATTCTGGTTCTGCTAAAGATAAACAACCATGT  TCGTCTGACAC    1.0 
AAAGAT