In [33]:
import pandas as pd
import numpy as np
import os
from uuid import uuid4
from itertools import product
import concurrent.futures
from datetime import datetime
import re
from collections import namedtuple, Counter

from tgsts.sequtils.trimming import trim_intersection
from utils import build_full_length_alignments, add_pipes
from tgsts.sequtils import str_to_fasta_seqrecord
from tgsts.sequtils.kmers import calculate_kmer_distance
from tgsts.sequtils.rra import identify_rrs_ali, mask_rrs
from tgsts.utils.parallel import run_concurrently

In [2]:
path = './erap_tile_test_new.xlsx'

df = pd.read_excel(path).replace({np.nan: None})
df.head()


Unnamed: 0,Samples,Library ID,Method,Fragment,NumReads,Fragment Name,cds_mismatch_list,gDNA_mismatch_list,analysis_code,Sequence
0,AMAI,ERAP1_BCAv2_11,pbAA,1,103,1a,No_MM,"5utr:-296delACACACACACACAC>, intron1:2595delT>...",1,GAAAGTCCTGGGGGCCACCTCTAACCCACCTTCCTCCTCTACAGCA...
1,AMAI,ERAP1_BCAv2_11,pbAA,1,122,1a,No_MM,"5utr:-331delA>, 5utr:-296delACACACACACACACACAC...",1,GAAAGTCCTGGGGGCCACCTCTAACCCACCTTCCTCCTCTACAGCA...
2,AMAI,ERAP1_BCA_22,laa,2,179,2a,No_MM,"intron5:13741delA, intron11:20063C>A, intron13...",1,TTTGTGTTCCTCTAGTGCTGAGATTTCTTATTCTTGTGGAGGTATC...
3,AMAI,ERAP1_BCA_22,laa,2,213,2a,No_MM,"intron5:13741delA, intron13:22270insT, intron1...",1,TTTGTGTTCCTCTAGTGCTGAGATTTCTTATTCTTGTGGAGGTATC...
4,AMAI,ERAP1_BCA_28,laa,3,171,3f,exon15:2285C>G,"intron11:20063C>A, intron13:22270insT, intron1...",1,AATGAAACTATAGATAACAATTATTTCTATTATCTTTTCAGGGAGC...


In [3]:
df[df['Samples']=='AMALA']

Unnamed: 0,Samples,Library ID,Method,Fragment,NumReads,Fragment Name,cds_mismatch_list,gDNA_mismatch_list,analysis_code,Sequence
10,AMALA,ERAP1_BCA_12,laa,1,393,1a,No_MM,"5utr:-357delA, 5utr:-323insACAC, intron1:1293d...",2,GAAAGTCCTGGGGGCCACCTCTAACCCACCTTCCTCCTCTACAGCA...
11,AMALA,ERAP1_BCA_22,pbAA,2,77,2a,No_MM,intron11:20096delTG,2,TTTGTGTTCCTCTAGTGCTGAGATTTCTTATTCTTGTGGAGGTATC...
12,AMALA,ERAP1_BCA_28,pbAA,3,715,3a,No_MM,"intron11:20096delTG, intron14:24183insA, intro...",2,AATGAAACTATAGATAACAATTATTTCTATTATCTTTTCAGGGAGC...
13,AMALA,ERAP1_BCA_25_reload,pbAA,4,103,4a,No_MM,"intron14:24484insA, intron18:27923G>A, intron1...",2,GCTTGGCAAAATGTCCTGAAGTCTTGTTGCATAATTTGCTCTCAAA...
14,AMALA,ERAP1_BCA_26,pbAA,5,68,5a,No_MM,"intron19:33709A>C, intron19:35500delT, intron1...",2,TCAAGTCAGTTAATACCCTAAGAATTAGATTTTATTTCTTATTCTG...


In [39]:
#Fill rows
rows = []

prev_filled_row = None
for i, row in enumerate(df.to_dict(orient="records")):
    
    if row['Samples'] is not None:
        prev_filled_row = row
        
    for column, value in row.items():
        if 'Typing' not in column and value is None:
            row[column] = prev_filled_row[column]

        
    rows.append(row)
    

    

In [40]:
#Filter rows

#Get nonzero analysis codes and remove missing seqs
rows = [
    row 
    for row
    in rows
    if row['analysis_code'] != 0
    and row['Sequence'] is not None
]

In [41]:
row.keys()

dict_keys(['Samples', 'Library ID', 'Method', 'Fragment', 'NumReads', 'Fragment Name', 'cds_mismatch_list', 'gDNA_mismatch_list', 'analysis_code', 'Sequence'])

In [38]:
# Load into sample dict
samples = {}

for row in rows:
    sample = samples.setdefault(row['Samples'], {})

    print(row.keys())
    fes
    fragment = Fragment(
        seq=row['Sequence'].strip(),
        
    )
        
    
    if row['analysis_code'] == 1:
        sample.setdefault(row['Fragment'], []).append(row['Sequence'].strip())
    elif row['analysis_code'] == 2:
        sample.setdefault(row['Fragment'], []).extend([row['Sequence'].strip()]*2)
    elif row['analysis_code'] == None:
        continue
    else:
        continue
        raise Exception('Other code found:', row['Typing Results::analysis_code'])
        
    #print(len(sample.get(row['Typing Results::Fragment'], None)))
    
#Lose weird numbered samples
for sample_id, fragment_info in [*samples.items()]:
    for i in range(1, 6):
    #for fragment, sequences in fragment_info.items():
        if len(fragment_info.get(i, [])) != 2:
            print('Deleting', sample_id)
            print(sample_id.ljust(20), f'fragment {i}', f'{len(fragment_info.get(i, []))} sequences', sep='\t')
            del samples[sample_id]
            break

    
#Lose None sequence
for sample_id, fragment_info in [*samples.items()]:
    for fragment, sequences in fragment_info.items():
        if None in sequences:
            print('Deleting', sample_id)
            print(sample_id.ljust(20), f'fragment {fragment}', f'{len(sequences)} sequences', sep='\t')
            del samples[sample_id]
            break

    

dict_keys(['Samples', 'Library ID', 'Method', 'Fragment', 'NumReads', 'Fragment Name', 'cds_mismatch_list', 'gDNA_mismatch_list', 'analysis_code', 'Sequence'])


NameError: name 'fes' is not defined

In [10]:
erap_reference = 'GTACAGTGGCCCTTGGTAGTGCAGGAAAGTCCTGGGGGCCACCTCTAACCCACCTTCCTCCTCTACAGCATCTCCCACTGTAGTCATTCTCTACCGAAGCCCCAGAAGGTGCGGCACTTTGCCACGACAGAGTACTGGGTTCATGTTTCTTTCCGAGGCGGGCCAAGAGCTCTCAGCCCACTGGCAGTGGCGAGATGACGGACACCCAGCGAGTCCAATGGGCGTCGAACGCGTCTAGGCTTGGTGGACTTGTCAGCGCCTGCCTGGCTTCGGTCCCCAACTTGAGCACCGGCCCTTTCCTGCATGCCCCTAACCCTCGCAACGCTAAACAGTGAAAAAAAAAAAAAGACAAAAACAAAAAGCATCTCAACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACGGATCCGCGTTCAGAAAGGCGTGCACTTCCTACGCCTGATCCCCCGCATCGCAACCTCGCAGCTTCCCCGGCGTGCAGCGCTCATTTACCAATTCCCTTCCTGGGAGTTGCGGCTTCCCTCGCTCGGCCCCACTCCCGTTTACCCTTTCCCCAGCTCCCGCCTTAGCCAGGGGCTTCCCCGCCTGCCGCTAGGGCTCGGGCCGAAGCGCCGCTCAGCGCCAGCCTGCCGCTCCCCGGGCTCCACTTTC|ACTTTCGGTCCTGGGGGAGCTAGGCCGGCGGCAGTGGTGGTGGCGGCGGCGCAAGGGTGAGGGCGGCCCCAGAACCCCAG|GTACAGCGCGCTCGAGCCGCGGGTAGGGGACTGCGGGCCGGGAGGAGAGCGCGGCACCCGCCCCTTCCCTGCGCCCGTCAAGTGGGGGGCTGAGGGCCTGGGGCACGGGAGGAGGGAGACGGGGCACGGGAGGAGGGAGACGGGGCGCGGGAGGAAGGCGACGGGGCGCGGGAGGAAGGCGACGGGGCGCGAGACAGGGCGCGGGAAGGGCGGGGGGAGTCGCTGGCTAGGCCCGAGTCCGCGGGGTGCCCGGCGGGTTGGCGGCGGGCCCACCCCTGCCGGTCCCTGTCCCTGTCCCTCCGGGCGCGTGGCCGGTGCGCCTGCTTCACGGGTCTCCCCGCTGTTCGGCCGGCGGGAGCCTCCCTCAGCGCTCCGCCTGGCGCCTGGATGCCTGCCAGTCCTGCAGGCCACCGACGCCCGCGCGAGGCCAAAAGGCGGGGTGGGGCGGGCAGCTGGCTCGGGCTGAGGAGGGCACCTGCCCATAGCTGCTAGAGAAACCCAGAGGCTTTGGGTTAAAGACTCTGGTGGGGTGGGATGCGCGGGCCGTGTGTGTTCTTAAGGTCACTTCCCTCCCTGCTTCTCCTGTTCTTCTGGTCAGCAATTCTCTCTCTCCCCTTCGCTCTGGCTCTGGCTGGGTTTTATTCAGATAAAGCACCTCTGTTGACGCAAATTAAAAGTTTCCTATCTGGGTGCCTCACTGGCCAGGTGGTCCTACAAAGTTAATTCCATGAGGGGAAGGGGGAGAGCACACACTTCCTCACGCTTTTGGATTTCTTTGTGTAGGCTAGGTTCAGAAAGAAATTATCTGTTTCCTATTAAACACCCAGAGGATTCGCTCTGAACTCAGGACGTGGTCAACAATTAACAAAACAACAAAACAAAACAAAACAAAACAAAAAACTTGAAAATTGGGCACAGTTGTCTCTTGCCTGAGGATTTTTAATTAGTATAAGTAGCACATTTTCAGGTGCGGCCTGAATAGAAACATTCTAGTACTTTTTTTTTTTTCAAATTAATCCAGCATTTTTATTATTTACCAACAGTGCTTGTTAATTTCATTGTTCAGGAAATTCTGGAAGAACCTCAATTACTTCTTGATGATCTATTTCATATACTATAGTGCCCCAATAAAAGGAAGGGAGGCAGAGGTTGCAGTGAGCCAAGATCGCACCACTGCACTCCAGCCTGGGAGACAGAGCGAGACTCCGTCTCAAAAAAAAAAAAAAAAAAAAAAAAGAGGGATCTTGGTGAAGCTGAAAAAGCAGGGAGTTTATACTCACACAGATGTGGATTGCATTCCAACAAGTTGTGTGAACTTAGCAAAGTTACATGAATGGTTCTTTGCTTCAGTTACCCCACCTTGAATGAGAATAATAGGCTATTAGAGAGAGTGAGCATGTGTAAAGTGCCTGGCACTTTGGAGGAGCTTAGTAAATATTAGTTCTTTTTCCTCCTTGGCTTCTATTCCTTTAAGTGTTAGTGGAGTGTAACTTCAAAGAGAATTGTACTTCTCTTGGGAAGAGCTGCTTATATTGAGTAGACTACTGTTTTTGAGAGCTTTCTTTTTTTTTAACTGCCTCCCTTTCAGAAAATTGTTAGTAAAACCAGACCCTAGGAGACCAGCCAGAACCATGAAATGCCATGTTTCAAACTGGAACACTTACTATTGGAAGCAAAGCCAAACAAGAGCTAATTTTTCAGGGGAAGAGCCAAACACACGAACATGATCACAGAATCTTGGATGTAGCCTACAGTTTGGGATTAATAAGAGAATTTATTAGTGAAGCCCTTTATTCACTACATGGAGTTTTTACCAAGCCCCACTCATGCACTGCATCCTCGTTGAGACATAACTGTTTCTCTTTGGACCCCTCATGGACCCAACCCTGCAAAGCCTCTGATCCAAGGTCCCGGTACCAACCCCTTCCGCAGCACATCAGCCTTTCTGTCAGCTCATAACGAGTTGGAATTTCTAGATCTTCTCTGGGGCTGTTGGAGAGGTCTCGGGGACTTTCAGAGTCCTTACATGCTTGAACCTGCCACCTTCACAGAGTCCTCTGGATCCCGTCTTGGGGCAGGGGCGATGCTCATTAAGCTGTTGCTGCCAGTAATTCCATATGGAAAGCAAAACACAAGTTCCATTTACTCTCTAGTTCCCCAACTTCAAGGGCAAAAAAATGTTCTCCCTGTTCACACTTCCTGTCTCACCTGGGTGGTGCCTTTTGAACTGGGATTATGAGATTTCCAAGACTCTCTCTAATGTGTAGGTATCCTTTCTGTTTAGCCTCCAGATTGCTCCAGAGGTGAGGAGAAGGGAATTCCCTTGAGCTGTGCATTTGGGAAGGGAGCAAGGAAGTCAGGGGTTAGGGAAGGCACTTCAGCCATTGCCTTGAATTAGTATCCTATCACATAGAGTTGAAGGGGGAAAGCCAGGATTTGGCAAGGATGAGCTTTTCAACCTTGGCTTCTCAGTAAAATCTCTGGACAGTTTTTTTTTTTTTTAAAAAAAAAAACCAAAAACCAGAAATTAAACAATCCGTAAAACCATACCCCGATCTTATCACCATAGGTTCCAGTTTAATTGTTGTCAATAAGGACCCAGGCATCGAAAATTTTAAAAGCTTCCCAGGTTACATTAATATGCAGCCAGAGTTAGGAAACTTGACATCTCAGAGAGGGAGAATTCCATGTACTGTGAACACTTTGAGGGATCCACATTGTAAGCTTGCTACTTTTCCCAACTGGAACACGAGGAGTTTGGGCCAATCACTTGCATTCACCTGGTATGGGCCCCTCTTGGCAAACACCCATTAGAAAGGTGCGTTTGTATAAAAGAAATAAAAACTTATGTTTGATGCTTGGGGCATGGTTTGCCAACTTCCTTAAAATTCACATTGCCTTTTTTTTTTTTTTTTAAAGACAGAGTCTTGCTCTGTTGCCAGGCTGGAGTGCAGTGGTGCAATCTCAGCTCACTGCAACTCTGCCTCCCAGGTTCAAGCGATTCTCCTGCCTCAGCCTCCCAAGTAATTAGGACTACAGGTGCGCACCACCACACCCAGCTAATTTTTGTATTTTTAGTAGAGACGGGATTTCGCCATGTTGGCCAGGATGGTCTCAATCTGTTGACCTTGTGATCTGCCCACCTCATCCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCACCATGCCTGGCCTGAAATTCACATTGCTTTTATGTCTTTAAAATCAATCCAGGTGATTGATACGTTTGCCACAAACTACTGGGAAAATTAAGCTCTTTAAGCTCTTCCTGGCTAAATAAATAGGTAATTAACTTTTGGCATGACAATTTGAGGAAGACACTGATGTTATTAAAGGTTACCATTACATATACTTACAGGGGAAGTGGAGACCTCGTTCTAGTGGTGGCTGCCACTGTGTCAGCATTTGATTTCCAGGGTACTAGGGTGACTTTCTCCAAGGTCCAGTGTCAGTGGGGAGTGGTGCTTGATCAGATGTTCCTCTGATATGGTTCTGGTTTTGCTTCTTTCATGAGCCTGGTTTCCCAAATGTCCTGCAACTCTGTGACCCGTGTAGTGAGCCACTCAGGATCCCCTAATGATTCCTTTTCTGCCTATATCAGCCAGAGCTTGTTCGGTTGCTTTCAACCAAGAACCCTGACAGGTAGAGTTAATTTAAACTTTGAACATCAAATGATGCTTTCTAGTACGTGTTAATGATTGTTGCTAACTGTAAACATCTTTCTTATATGAAACCATAACATAGGGAAGGTCCTTTTACTTTCAGGAAAAGACCTAGTACTTTTGGAAGTTTATGCCTATTTCTGTGAATGCTGGGTGGATACATTCTGAAATTATGCTGTGTCAATAACATTTTAATGACATATATTTTTGCTTTTGTACATTTGTGCCGCTAG|GTAGGTAGAGCAAGAAGATGGTGTTTCTGCCCCTCAAATGGTCCCTTGCAACCATGTCATTTCTACTTTCCTCACTGTTGGCTCTCTTAACTGTGTCCACTCCTTCATGGTGTCAGAGCACTGAAGCATCTCCAAAACGTAGTGATGGGACACCATTTCCTTGGAATAAAATACGACTTCCTGAGTACGTCATCCCAGTTCATTATGATCTCTTGATCCATGCAAACCTTACCACGCTGACCTTCTGGGGAACCACGAAAGTAGAAATCACAGCCAGTCAGCCCACCAGCACCATCATCCTGCATAGTCACCACCTGCAGATATCTAGGGCCACCCTCAGGAAGGGAGCTGGAGAGAGGCTATCGGAAGAACCCCTGCAGGTCCTGGAACACCCCCGTCAGGAGCAAATTGCACTGCTGGCTCCCGAGCCCCTCCTTGTCGGGCTCCCGTACACAGTTGTCATTCACTATGCTGGCAATCTTTCGGAGACTTTCCACGGATTTTACAAAAGCACCTACAGAACCAAGGAAGGGGAACTGAG|GTATTTTTTTTTCTCTTTTTCTTTTAAACTGCAAGTGCTGCCCACGCTAAATTCATTATTTCAGATTGATTGTCTTTTAAAATTCCCTTTGCTGTTGAACTTTTTCTTCAGTTTTGCTTTTGCATCTTCTTTATAGTGTTAAAAATGGCTTTTTCCCTTGCTTTTTAAATCTCATTTTAAAATTCTATTTTAACCAATTTTCTTTCCCCCAGCTCTATCAGAGTAAATATCTATTTGTTTATTTGGTTCGATTTCTGAGACATAATAAACATGTTTAATTTTCCTGAACTGTGTATTAGTTTCCTAGGGCTGTTGTAACAAAGTACCACAGACTGGGTAGCTATAAACAACAAAATGTATTCTCTCTCAGGTCTGGAGGCTAGAAGTCTGAAATCAAGGTGTCAGCAGGCCCGTGCTCCCTCCAGACTCTGGGTAGAATCCTTCCTTATATCTTCCTAGCATCTAGTGGTGGCCGTGGATCCCTGGTACTCCATGCCTAGCACCTGCGTCATTCTAGTTTCTCCCTCTGTTAGTCGCATGGCCATTCTATTTTCCTATGTCCCAGTCTCCATCTTCTTATAAGGAAACCAGACATACCAGATTAGGGCCCAGCCTGGTGGCCTCTTCTTCACTCCATGATACTTGTAAAGACCCTATGTCCAAATAAGGTCACATGCACAGATACTACAGGTTAGGACTTCAGCAAATCCTACTAGCAACATATTAGAGGAAATACTTTTATCTCAGTTAAAACTTTTTTAGAGATCTCTTCTCACCTTGCTTTGGTTCTGTTTTTAAGGAGGAGACTTATTTGGGGGAGATTTTATGCTCAGTTTTAAAATGGAATTTTATTTGTTGGTAGATTATACTAATTTATTTTTCAAATTCCATATTATTTTATCAAGGTAAGAAAGTAAAATTTATTTCACTCATAGCCCCCTGAACTGACCACTACTTCTATTTCACTGGAATATTCTGCCAGACTTCTTTCTGGATATGCATATATATTATTTTATATAAATAGAATTCTTATATTTTCTCTTTTACAAGGAAATTTTTTAACTTAATATGTTGGGAAGATGTTTTATATAAATGACTATTACTAGACATCTTTTTAATGATTTTATTAAAATACATAGTTGTACATTAATATCTTTAATCCCTCGGGAATGGAAATGAAATTGTTTCCAATTTTTCAGTATCAACAACACTTTGATGAGCATTTTTGTAAATTATTTCTTTAGAATAAATGCCTAGAAGTAAAATTGGTAAGCCAAAGAGCCTATAAATTTTTGATAAACTTTGTCATTTTTTCTTACCCCTCACCAATAGTGTATATTGGCAGTCTTTTTAGAGCAAAAAAGGTCCCCAAATAAATGATCTCTTTTTAATTTACGTAACTTTGATTAAGGAAGTTGAGCAGCTTTTGATATGTTTGTTAATGATTTATAGGTTCTTCTTTTATGAATTGCCTGTTAATGACCTTTGACTTTGCCTGAGATTCCCTTGGTTGTTGGGGATTTTTTTTTTTTTTTTTTTTTTTTTTTTGTCGAGACGGAGTCTTGCTCTGTCACCCAGGCTGGAGTGCAGTGGCGCGAACTCGGCTCACCGCAACCTCCGCCTCCCGGGTTCAAGCAATTCTCCTGCCTCAGCCTCCTGAGTAGCTGGGATTACAGACGCGCACCACCACACCCAGCTAATTTTTGTATTTTTAGTAGAGATGGGGTTTCACCATGTTGGCCAGGGTGGTCTCGAACTCCTGTCCTCGTGATCCGCCCGCCTAGGCCTCTCAAAGTGCCAAGATTACAGGTATGAGCCATTTTTAGGATTACATATTTTTAGGATCTCACTATCTGTTAAGGCTATTAAGTTTTCCTCACATACTTCTTAAATGAATTTTCCCTGTTTTTTACTTTTTCTTTTTAACTTTTTTTTTTTTCTTCCCGAGACAGGGTCTGGTTCTGTCGCCCAGGCTGGAGTGCAATGGCGCAATCTCAGGTCACTGAAACCTCTGCCTCCTGGGCTCAAACCATCGTCTCACCTCTGCCTCCCAAGTAGCTGGGACTACAGGCCTGCACCACCATGCCTGGCTAATTTTTGTATTTTTGGTAGAGATGGGGTTTTACCATGTTGCCCAGGTAGGTCTCACACTCCTGGGCTCAAGTAATCCTCCCACCTCAACCTCCCAAAAATGTGTTAGGATTACAGGCATGAGACACCATGACCATGCCCAGTGTAACTCGTCTTTTAACTTAGTTTATGATAGCTTTTGTCAGATGAAATATTTTTTAGTAGCAAAATCCATCAATCTTATAGTAACTTTAGGAATTAAAAAGCAAACACCTGTTTTAGAGTTCCTGGTGCTTCATGCTCAATTTTTATTTCACTTGCCTTAATTTTAG|GATACTAGCATCAACACAATTTGAACCCACTGCAGCTAGAATGGCCTTTCCCTGCTTTGATGAACCTGCCTTCAAAGCAAGTTTCTCAATCAAAATTAGAAGAGAGCCAAGGCACCTAGCCATCTCCAATATGCCATTG|GTGAGTCTGCACTCCTGTGTATTTTCTATAGGAAAATCTACTGATTCTCTGTGACTTGCACTAGCCCAGTGACAGTCAACATTGGGTCACCTGTTTTGTTTTATTGCCTGGCAGATCGTTACTAACTTTTCATTTATAACCTATGCTTTTGTTTCAAGCCATAGTTATATGTAATCAAAGTAAAAATTGCACCTAAAAATGCAAGATTTCAGTAACAGTGCCATTCCAGGTTATACATGCTGATAGGAGGGAAGTGGTATAAGAAATTCAGGTCAGGTTTAAATATTAGTGCCCTTCACAAAGCACTTTCACCCTCATTTTCTCATATGATCTTTTAAAATGATTTTAAAGGTAATGCTTTGTCAATAAGGCCATATTTTTAGCATACAGTTATTTTCTCTAAGTTACATATACTATATAGTAATATTCATTATATAATTTGTAACTGAATGTACAAAATTGGGCAGACAGAAAAAGAGAATAAAAGTAATCTTTTCCAAATATTATGGTGCTGAAGGTAAGTCATGATAGATAGCTTAGCTTCCAGAGGGAAACTATTATTCCCCAATCTCAATGCGGATGTGGACAGCATTCCCTCTGATTTTTAAAAGTGACTAGAAGATGACCATGCCAAATGAATAAAACTGTTCAGTAAGTGCCATCATCCTTTGATTCTGGTAGTTTAGAAAAGCATCAGCTGGGCCGTCATTCTGCAGCTGGTATATAACACCTCCTGGAAGCACATCCTTTGTTCAGAGAAACTCACTGGGGATCAGAGTCAGAGTAGAATAGGCTTTGCCTAGAGTCCTGAGGGAAGAACAGCTTTGTCCCTGTGCTGACCGGGGAAGCAATATCATAACATGGAGAGATACTGAGAGCCACAGACCAACCTCTAGTGTGGTGCTTCTCAACCTTAAAACTCTATACCTACCTGCCTTCATACCATAAGGATGCCTTCCTCAAACAAGATTCGGATCTCCCACCCCACGCCAGAGGGCTATCCTCTGTAAAAATCACTCTTCTGCAAATTCCTACCAGCCAAGAACTTCTGTCCCCACTCCCACCCTTGTATATGAAAAGACAAGAAACAATTATGCTATTTTCCTAATATAAATTTAATATACAGGATGTGCTTTTTCAAAATATAGTCCTCTCCCCCAGTATTGTGGTATTACTCCCTGGGACAGAAGTGAGTTCTTTAATTGGTGAATCAAAGTTCTAGGAGAATAAAGGACCAGGGATGGGAAGGAGACAGGAGAGAGACTGAAGGACCAAACAGGATTAGTGGAGAAATTTGTAGGCTTTCAGAAGGGAGGCCTGGAGCTTTGGAAGCGCCACAAAGATGCTACAGTCTAAATCCATGGATATCCAGGATCCACTTAGTGAAGATAGGAAAACTTCTTTTTTTTTTTTGAGGATAGGAAAACTTCTAATGCAATGTTGTCCCTTTGGACGGGAATACCTCTCACTAACAGAAATCTAATACGAGTAGCCTGACCTCAGGCTGCAGATATTGAGCTGAGGGGAGAACAATGGGGTCTCAAAAGATCTTTTTGGAGACCAGAAAAACACAATATATACCATTGGAACATTGAAGCTTTTGGGCATGGGGCAGAAATTAATCACATTTAAATTTGAATTAATTTAATCAGGTTATTTTCCTAATAATTAACACAACTCGAGAATGGAAATTTTTGGCCAGGTGTGGTGGCTCATGACTGTAATCTCGGCACTTTGGGAGGCTGAGGCAGGTGGATAACCTGAGGTCAGGAGTTCAAGACCAGCCTGGCCAACATGGTAAAACCCTGTCTCTACAAAAATACAAAATTAGCTGGGCGTGGTGGCACATGTCTGTAATCTTAGCTACTTGGGGGGCTGAGGCAGGAGAGTCGCTTGAACTCTGGAGGTGGAGGTTGCAGTGAGTCAAGATTGTGCCATTGCACTCTAGCCTGGGTGACAGAGTGAGACTCCATCTCAAAAAAAAAAAAAAGGAAATTTTTGTTGTAGGTAGGCAGAAGCAGAATGCATTTAAAAAGAAAAGATGATTTGGGATCCTTTATGAGTAATCCTAGGCTGGGTAGCAGAGTTGGTTTGAATGACCAAATAGTGACCAGAAGTTGGTGGCTGATGGGTATTAAGAAGGATGAGGGCCAGGTGAGGTGGCTTATGGTTGTAATCCTAACACTTTGGGAGGCAGAAGAAGAGGATTTCTTGAGGTCAGGAGTCCAAGACCAGCCAGGGCAACATAGCAAGACCCTATCTCTCAAAACAAAAAAAAAAGATGAGGTCAGAGCAATAGAGGTAAGTATTGGATTACAGGAAAAATGCCCGTGACCATGGTTTCACCCAGCTAATTCTGGCTGGTTCTTTTTCCATCTCCGTGCTTTTTATTGCTGACGTGTTAGACTTTCTTCTTTAGGGGCAGACCTCTAAGACTGTACCTCCATCAACTATACCCCACCCTTACTCTCTGATTGCACTTAAAAAGGTGATTCCAATGAAGCAAATGAAGCAAATCTTTTTTTTTTTTTTTTTTTGAGATGGAGTCTCGCTCTGTCATCCAGGCTGGAGTGCAGTGGCGTGATCTCGGCTCACTGCAAGCTCTGCCTTCCGGGTTCATGCCATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGACTACAGGCACCTGCCACCACGCCCGGCTAATTTTTCATATTTTTAGTAGAGATGGGGTTCCACCGTGTTAGCCAGGATGGTCTCAATCTCCTTACCTTGTGATCCACCCGCCTCGGCCTCCCAAAGTGCTGGGATTACAGGTGTGAGCCACTGTGTCCGGCCCAGGTTACTTTCAGTTATACAGCAGAACAGAAGCTCTTTTAGGTACTACAGGGTTATATATTTTCCCCGTTGCGTATATGCTCAACAGCTCGACATTGCATTGCCAGATAATTCTCAAACCTGTATTTAAGGAAAAGTGGATCAGCCACATCTTGGCAAAACTCACAATTTCAGTTTTGCTTTGTCTCATCCGTGTTATCAATCCACATATGCCAAATGTGGATTTACAGTGTATTGTAAACTTTAAAATGGTAAGTTGTATGGTATATGAATTATATCTCAATAAAAAAGAAATTGAGTAGAACTGTTTGACGTTAATGTCTAAATTATAATTAGACATTGGAAAGATAACTTTTAAAGTAACTATAGAAGCGTCATTAGACAGGGTCTGGCTCTGTCATCCAGGCAGGAGTGCAGTGGCTCAATCTTGGCTCACTGCAACCTCCACCTCCCAGGCTCAAGCCATCCTTCCACCTCAGCCTCCCCAGTAGCTATGACTACAGGCACGCACCACCAGCAGGACTAATTTTTGTATTTTTTTTGTAGAGATAGGGTTTCACCATGTTGCTTAGGCTGGTCGCAAACTCCTGAGCTCAAGCATTCTGCCTACCTCGGACTCCCCAAGTGCTGGGATTGATACACTTTTAATATTATGTCTGATAATTAGGAAATTTATCATGTTCACTGTATTGGATAATTGGATTACTTGATAATTTGAATTATTCTGATTTTAG|GTGAAATCTGTGACTGTTGCTGAAGGACTCATAGAAGACCATTTTGATGTCACTGTGAAGATGAGCACCTATCTGGTGGCCTTCATCATTTCAGATTTTGAGTCTGTCAGCAAGATAACCAAGAGTGGAGTCAAG|GTGAGCCTATGACTGTCACATATGGTGACCAGCTTGTTCTGGTTTGCTTGGAACTGGTTTTAAAACTGGAAGTCTGCCTGAGCGCAGTGGGTCGTGCGTGTAAACCCAACATAAACCCAACAGTTTGGGAGGCTGAGGTGGAAGAATCACTTGAGGCCAGGGGTTTGAGACCAGCCTGGACAAAATAGTGAGAACCTGTCTCTGCAAAAAATAAAATAAAAAAATTAGCCAGGCATGGTTCCTTGTGCCTGCTACTAGTCCTAGCTACTAGGGAGGATCCCTTGAGCCCAGGAGTTTGAGGCTTCAATGAGGTATGATTGTGCACTCCAGCCTGGGCAACAGAGCAAAACCATGTCTCTAAACAAACAAACAAAGACAAAACCAAATACCAAAATCCTGGAAGTCCTGCATCCTGGGAACCTTCTCAATCTCAGGCAAACTGGGATGGTTGGCCAGCCTGTTGTCACGGATGCTCATTTGTATAGTGAGGTTCTAATAACAACAACGTGGAGAGAGTGTGGCCTGGCCTGAGTCATGATCCTGGCTTCACTGCAGTCACTTCACTGACTCTCTGACCTTGGCCCTATTCCCTCTGAAACTTAGTATTTACTTCTTTGGAAGGTATAACTTGGACTAGATCCTGCAATGGTCTCTAAGGTTGCTTCTGGTTATGGCTTTCTGCAGTTTGGAAGTAAATGTTACTATCTGGCAGGGGATTTCTGGCTATGGTAAGGAAGATAGAGCAACCTGCTTGGAATACCCAAAGGCTTTGGGCCAGGTACACTGGAATGCTGGAGAGAAAAATCTTGTTTCAAGGCACACTTGTTCCTCATTTGGGTACTGTTGCATAGTGGGCAACCTATTCAACTGTGTGCCGTAGCTCAGAATGCAAACAGGTTTTTCTGAGGGGAGGAAGGGATGCTTTGTTTGAAGATACCTTATGTGTTTGTGCTGGTTTTCACTGAGGCCTGAATAGATGGGGATTCCCTGCTGAATTGCTTTGTGTTCCTCTAGTGCTGAGATTTCTTATTCTTGTGGAGGTATCTTTACTTAACTGGGGATTTGAAGGTGACGCTTGAGACTCGGATGAAGGGAACATTCTTAATTCAGCAGTGAAACTATCAGCAAAAACACCCGCCCATTGCTTTGCCACTTATCTGAATCTCTTAGAAATGATTATTTTAGTAATGTCTAATCTATATTAATATTTTTAATTCTTTCATTTCTTTAAACACATTAAGCATACAATTATATATCTGTGTCTGGTAATTGTTTTATCTGAATTCTTTGTGTATCTGATTTTGTGGTTCGTTGTTTCTGCTGGCTCTTGCTTATGGTATCTTGTTTCCTTGTTTGTATTATGAATTATGTTTGTGAGCTTACGTTGCCTGAGTCTAAAGTGGATTATTCCAGAGAGAAATTGTATTTGCTCTTACAGAGTGTCTGGGAGTACTTACTGGTCCAGGGATCACTTTACTTGTAGTTTCCTTGAGAAAGGGTAGTTATTTCTAGTTTACCTTTACATTAAAGGCCTGGCCTTTGGGTACTAGCTTTATGCAGGGATTGTATGTCCTGTTAGACTTTCTACTTTGGGCAGGCCCTGGACTTGGTCTCTTAACTCCTGAGTCCTTCAATGACATAAGAACCAAAGCTCAAGTCCAGCTGTGTTGGGCTAGTGCCGGCAGGGTTAAAGCTGGCTGCAGTGCTCTCCTGACATCAGAGGGTCTAACTGTCATTTCACTTTGGCTTCTAAATCTTTCTTTCTCATTTGCCATCTTATAAACACATTTAAGAACACTTTATACATGTTATCCAGCATTTGTTGTTGTTTTCAGAAGGGGGATTAATCAGGAACAGTCAGTATTAATGCAAGAAATGGAATTCCCAATTATTTTCTTTAATATTGGCAACCATATCCCACAATATGAAGACATTAATGTCAGTCTTCTACACAATGTGGGGAGAGAAGCCAGTTAAGATATTTGAATTCCTTTCTGTGCCTTTCTCTTTAG|GTTTCTGTTTATGCTGTGCCAGACAAGATAAATCAAGCAGATTATGCACTGGATGCTGCGGTGACTCTTCTAGAATTTTATGAGGATTATTTCAGCATACCGTATCCCCTACCCAAACAAG|GTAGAGATTTTGCACAGATATTACACATGACATTTGATGAACACAGTCATAGATTTGTCATTATAATTGGCACATCCCTGTAGTTGCCTCAGCAGCCCCTCAAGCCACAAAAACCCCAGCAAGTGACAAACCTGCGGTTGATCTTTCTGAGCATCTCCTCACCCTTGATGAGTACAGTAACTTCTAGTGATAGTGAAGAAAGCAGATCTTCATAGAGTTCTTGAGGCATATGGCATGGGGACTCTTTTGCCTTCTGATTTTATTAGTGGGCAGACAGCAGAGGGAAGAGGCTACATTTTTTCTTTACTGGCACCTGCTTGGCAGGAACCCAGAGGATGCTCAACAAACTGTTTTGAATGAATAAATTTATATAGTGTAAGACAATCTGAATTTTCTTTCTTACACAAGCCCTTAAAACTATTGTATGTTTGATTTTTTAGGTATATGTGGGTCTTGGGCATCCAAAATAGAATGGATTATTATAATTGTTTAGTTTTTCAATTTCCAAACTCCTCATCAGAAGGTTAGAAATGGAGTCAAGAGGCCTGAAAAGGCGGGCACGGTGGCTCATGTCTTGTAATCTCAGCACTTTGGGAGGCCCAGGCAGGTGGATTACTTGAGCCCAGGAGTTTGAGGCCAGCCTGGGCAACATGGCAAAACCCTGTTTCCACAAAAGTACAAAAATATTAGCTGAGTGTGGAGGTGCACTCTTGTAGTCCCAGCTACTTGCAAGGCTGAGATGGGAGGATCACCTGAGCTTGGGAGGTTGAGGCTGCAGTGAGGTGTAATTATACCACTGCAAATGCACTCCAGTATGGGTGACAGAGTGGGACCTTGTTTCCAAAAAAAAAAAAAAAAAAAAAAAAAAAAGGCCTGAGATGCAAGTCTGACTTTGCCACATTTATAGCAAAGTGATGCTGAGTCACTAAGCCTCTTTTTCTCCATTGTGAAAGGTTCTTCCAGTCTAGAGCTCCATGATTATGCACGGGCAGCTGGTTCAAACACCTATCCATTCTGAAGATTAGTGTTTGGGAGAATGTATAGCTTAGAGACTGGTAATGTATTTTATTACTTCCTTCCCAAG|ATCTTGCTGCTATTCCCGACTTTCAGTCTGGTGCTATGGAAAACTGGGGACTGACAACATATAGAGAATCTGCTCTGTTGTTTGATGCAGAAAAGTCTTCTGCATCAAGTAAGCTTGGCATCACAATGACTGTGGCCCATGAACTGGCTCACCAG|GTATAAGCTCATTCACACTTTTAATAAAGTATAAACTACATTTATATTGCTTCTATGGGACATATAAGGCTATTTATATAATTTTTACTTTGTCTTTTTTTAATAGGAAAAATTGTTTCTCCAAAGCATTCGTTTTTATGTCTTATAATGCATGTTGAACTTTTTTTATTTTTACCTTGATTAAATATTGGTCCTGTAAATATATGTTAACATTCATAAACTTATATTGGACATCTAAAATATACTTCTTTCTGAGTGTCTTTGTTTATGGCTTATGTTGTGCTTTTAG|TGGTTTGGGAACCTGGTCACTATGGAATGGTGGAATGATCTTTGGCTAAATGAAGGATTTGCCAAATTTATGGAGTTTGTGTCTGTCAGTGTGACCCATCCTGAACTGAAAGTT|GTAAGTAGTTATTTATCCTTCACATTTGAGGTTAATTTGTTGTTTTGTTCAATATTGCTGGAAAATATTCACTAATCTTTGATTATAGAACTTATAAAAATATTTCACTGATAACTTCCTTGACAGTTTAGATATGAATCGTGTTGCAAAAATGCTAGTGAACTTTCAAGATATACCACAAAGACTTATTTACAAACTCAGTTGTGAATCTATGATTGTTTGTAAATTGTCCGATTTTATATTTCTTAATATCAAAGAAATAGAAATAATGGCAGCCTAAATGTTCCATTTCATTTTCCCAACCTTCAGGTTGCTCATTGCAGAATTATTAGGTACAGATTACTGATATCTCAATAGGACCCTACTTGCCAAATCAAATGAGTTATACTTAAGGTAACTGCACATTTGATTGTATAACAACCTAGGCTTCTGGATCACAAATTTATTGCTTTGGAACTGCTATATGGTTTATTTTTTAAATCACCACATTTAACTTAAGAAAATCACATATAGCAAAATTAAACTTATGTAAAAATTTTTTTATCCTACTCCAATGATTCTTTTCATTTGTAAATATTTATTCCTATTTTCATCTGCCTGTATACATTATTTAAAATATACATTTGTGAATATTTATTACTGCCTATTTTCATCTGCCTGTGTACATAATTAGAAAGTACACATCAATATTGCATTATAATCCTAAATATTTTCTTATGTTTCTACATGATCTTTAGTAATAAAAATGATTATGGATACATATTGTCCTGTTGAGCTAATGTGCTCTAATAAAGCTGGTTAAGGTTCTCCATTTATTTTCATTATGTTTTTAAAGGTAAGTATCTTTGCTCTAAAACAGTACAGACAATGATTGGAAATGTTGAAATTACTATACAGTTAATTTCTTGTTGTGTTGCTGTTTGGCTATAGGCATAATTGTTTCGTTTTTAGATTAATACGAAATTTTCTTTATTCTAAAGGACTTAAGATGAACAAAATCTATAATGTTTAACTAAAATCATTGTTTCTTGGGTAGCTTTTAGAATATATTAATTCTATTTTGTTTCATAAATATTAGGAAATATGGAATAGGTTGCCCCCAAAATGTGAAGTATGGACTTCTTGCCTCAGATAAAATGTCCCACCTTTGACATTTTTTATCTAAATGTAAATCATAGGTGATGTTTTCTTTTTTCTATCTCAATAG|GGAGATTATTTCTTTGGCAAATGTTTTGACGCAATGGAGGTAGATGCTTTAAATTCCTCACACCCTGTGTCTACACCTGTGGAAAATCCTGCTCAGATCCGGGAGATGTTTGATGATGTTTCTTATGATAAG|GTAAAAGTAGATTGAGTATAAGGATACAGTTTAGATACTAAAGTTATACATACTGGGGTGGAGAAGTTATAGGCAAGGTTGTGGGGTTAAACCCAGATTGAATGCCTTCTCTCTTGACACGTGCTGGCTGGAGTCAACTCTTTTAGGACTAACTTGCAGTTTGGCTCATAACCCTAAAGATTATTTTATGGGAAATTCTTCATATATTCTTTCTTGGGTTGAAAATTCATGGCTTACAGAAACTCTGCTTTTATTCATCATTCAACAAATAGGTATTGATTGAGCATCAACTTTGTACTAGACAAAAATTCCTGCCCTCGTGTAGCTTACTTTTCAAGGCCTTCAGCAATGGTTAATATTGTTGAGACGCAAATAATTGTCTTGCACAGTGTGCTTAGTAACAGAGTTGGGAATTGTTGACAACAGTTTAATGATTGGGAGATTTTATGTAAAATCCAGATTTCTAGCTTCTCTTGGGGAAGAAAAAGGAGGATTTGGCCACTGTATGTTCTTTGCAGCATAAGCTGGAGCTAAGTTGTTGTTCTCTCTTTGTAAGATCAAGGCTCTGCTTTTCCACTTTCCCTGCCATTCCCAACTGTTCTATTGTCTTCTCACCGAGGCTGAGAGTGTGTTGCCAGTTACCATTGTGCTTGGCTGTTGTTTTACCGGTAGCAAACAGAAAAGTCTTTCTTGCTTGCATGTTTCCATTGAAAGTCCAGGGGAAAAAGAATGTAAAAGAGCATTCTTCTTATCCTTGGACTACTTCCCTTATTTATATGCCCTGTCATGTGCCACTGGAGGCATTTGAGTTTGTGACTCACCATCCGTGGTAATGGGAGTGGAGGGGAAAAGAGCCCTTTACCAAGGAATACAGGGTGTCTGGGAAGACTCTTGTTCCCTTTCTCATTGTGACTCCAGCTCCATCAGCCCTCCATGCTCAAGGCTGCCTGGGCTCCCTGGACATATCCACTTTTCCTTCCCTGGCATCTACCTCTGCCTCCATCTCTAGTGCTCCACCCCTTGTTGTACTGGCCTCTCCTTAGTCCTGCCCTGGAATGGCAGTGGGAGAGCCAGGTAGTAGCTCAAGGTCCAATGTTTAATCTGCACCATTATCCCCACTCACATGTGAACAAAGGGAGTTGGCAGATGATGCTAATTTGCCCCATCGGGAGGTCTGGCTACTGATAGAAAATAAGGGCCTCAGTGGGCTCAGAGCATAAGCAATCACATTAGACAAATCTCCTGCCTAAACAGGTCCAGGTTTAACCTGCTTACTCTGTTTCACAAATTGCCAGACATTAACAGTGTTCCTGCAGTTGCGTTTTCAAAGAAATGTGTTTTATTGCAAAAGAATATGTGATTTCAGATGAGACTGCAATGAAACTATAGATAACAATTATTTCTATTATCTTTTCAG|GGAGCTTGTATTCTGAATATGCTAAGGGAGTATCTTAGTGCTGACGCATTTAAAAGTGGTATTGTACAGTATCTCCAGAAGCATAGCTATAAAAATACAAAAAACGAGGACCTGTGGGATAGTATGGCAAGT|GTGAGTATGTTTTTGAATATCTCTGCATTTGGGATTGACAGGCTTATCATCTTGTTTTGTTTTCCCTGCATTATGTTAATCCCTCTGAGGAGAATCATTGTTTTCTATAGAAATAAGAGTGATGTGTTTATTTTTGGTTTTTAG|ATTTGCCCTACAGATGGTGTAAAAGGGATGGATGGCTTTTGCTCTAGAAGTCAACATTCATCTTCATCCTCA|GTAAGTTTCTATATCTGTACATGTTCCCCCAAGCACATTCTTTTACTGCATATTCTTTGAAAGGCAGCTCTGTGCCAAACTTTCTGAGGTCCTTGATTATATCACCCTCATTCCAGATAAGACTGCATTTAAACTATTCCATACTCATAATCTTTTTCAATTTTTCTTAAAGTGTATCTATACTGGAGGGTTGCAGAGCTTTCCTTGGTAATGCTTCTCACTGATACTAATTTCTCTAGCTTCCCTTTTAAAGCAGTGGATTTATGACATGTTTCTATAGCAGATTACAGCTGCATTGTAGCAGTCAAAAGGATATGTCAGTCATTTACAGAGCTCTGCATTTGTACAAAGACAATGGCACTGAGCATTCTTGAATACTTGTCATGTGTCAGGCACATGTTAAGCACTTATATGTATTATCTCCTTTACTCTTCATAATAAACCTGTGAGCTGGGTACTATTACTATCCCTATTTTTAAAGTTGAGAACATGAAATACAAAGACATCTGATTGGTAAATTGCAGAGCAGAGATCTGAATTTAGATCTAACTCATGTTTTTAACTGCTAAGCTATAAAGTATTCATAACATCAAGTCACAAAACAGCCTGAGTCTCTGTTCATCTGGACTTGTGGGATGTTTTCAGAGGAAGCTGAGGGTGAGTCTGGAGATACAACAGAATTTTTTGTTTCTTTTTTCTTTTTATATGTTTACTTTCTTGGCTTTTTTGTTCCTAGAGACTGCTTTATTCAGTAGTTTCTAGATTTGTCCTGTGGCACATTTTATCTTTTTACTAATTCTTTTTTTTTTGAGACAGAGTCTCACTCTGTTGCCCAGGCTGGAGTGCAGTGGCGTGATCTCGGCTCACTGTAGCCTCTGCCTCCCCGGTTCCAGCGATTCTCTTGCCTCAGCCTCCCAGGTAGCCGGGATTACAGGCACACGCCACTACACCCGGTTAATTTTTGTATTCTTAGTAGAGATGAGGTTTCACCATGTTCGCCAGGCTGGTCTTGAACTCCTGACCTCAGGTGATCCACCCAACTCAGACTCCGAAAGTGCTAGGATTACAGGCATGAGCCACCGCGCCCAGCCTTTACTACTTCTTAAATCATTTCTTAAAAGCTTTTTTAGTATCAAAAAACAGCTCCTTTTGAGTTCCCACTATTTGTTGAGTGTGGGTCATCTCTGTGTTCTCACTTTAGAAACACAGGCTCTTGACTGAACATTGTTTCCACCTTGCTTGTCCAAAACCAGCATAGTTAGGTAGGTATTGAAAACCTGGCACTTTCTCTCCCTTCTCTTCCTTCATTCATTCACATGCCTGCTTTGTGCCCAGTGTTATTGCCAGCCCCAAAGTGTGCCCGGCAGAACTAGGTATGCTACCTGTCCTCCAGACACGTATGTGTAGTGGAGGAAAATGACAAGCAAACAGCTGGTGCTCTAATGGAGGTGTGATGGCTGGGGACATCACACTGAGAAAGGGATCAGAAAGTGCTCCTAAGAGGGGAGGTTGGCAACTGGTAGCGGCTGTTTGAGCATAAGCTGGTGTTCTCTCTTTTCTTAGAGTTGGGTTAAATGGGTGATGTGTCTGCCTTTTTGTGTACACACCAG|CATTGGCATCAGGAAGGGGTGGATGTGAAAACCATGATGAACACTTGGACACTGCAGAAGGGTTTTCCCCTAATAACCATCACAGTGAGGGGGAGGAATGTACACATGAAGCAAGAGCACTACATGAAGGGCTCTGACGGCGCCCCGGACACTGG|GTAATGCTCCTAGAGTAAAATTTGTTTTGTTGTCTAGGTAACATCTGCCTTGTAGGATGGAACCTTGCTTTTGAAATAATGCCCTTACCACTATTGCTAAAATATTTCAGCTGCATCTGTGTATCCTATGAAGTTGACTTATACTCCCTGCCCCCATCTTCCCAGTAGGATTAAGGAGGCTTTAAACCTTGGTTATTCTCAGTAAAGGTGACGATGTAATTACTTTAACATTCTCATATTTTGTAATTTGATATGATGGTAATTTCTGGTTACTGGCTTGAAATCAACTCCAACCTAAGCAACTGCTACTAGATTACAATAGTGCCTAGCATTTGGTTGGAGCTGAGGACAAAAGAATTTAGGTGATTTCCGAGAATGATGAGAGACTCAGTTGTCTTCTTCTGAGTTAGATTTGGAACCTGTTTGTCAGCTTAATGCTATAGAAGAATATTATTAGAAACAAGATGCTGCAACTTGATTGACCCTGGATGGATACCGTTAAAAAATTCTTTTATCTTGAAACAATTTCAGACTTATAGAGAAGTTACAAGAATAACACAACAAATTCCTATATATCCTTTACCTAGATACATGTTAACTCTTTACTCCTTTACTTTCTTCACCTCCCTCTCTCTCTTGCTCTCTCTCTCTCTCTATATATATATGTATATGTTTGTGTGGATATATGTGTGTGTGTATATATACATATATATGTGTATATATATACACACACATATATATATATAATATATACACCTCCCTCCCCCATCTTCCCAAAAGGATTAAGGAGGTTTTAAATCTTGATTAATCGATATCCATCTATCTACATACATACCTATATACATATGTCTGTATGTATATACTTGGCATATGTTTTTCTGAACTGTTTGACAGTAAATTGGTGACATGATTCCCCTTCACACCTAAATGTTTCAGCATAGTATTTTCTACAAAACAACATTTATGGCCGGTCACAGTGGCTCATGCCTTTAATACCAGGACTTTGGGAAGCCGAGGCAGGAGGATCACCTGAGGTTAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACACCATCTCTACTAAAAATACAAAAATTAGCCGGGCATGGTGGCGGGCGCCTGTAGTCCCAGCTACTCAGGAGGGTGAGGCAGGAGAATCGCTTGTACCCAGGAGGTGGAGGTTGCAGTGAGCCGAGATCGCACCACTGCACTCCAGCCTGGACAACAAGAGCGAAACTCCATCTCAAAACAAACAAACAAAAAACAAGAACATTCAGTTATATAACCATAGCCACAGTACAATTATCAAAATTGGGAAATAAACTTTATATAATACTATTATCTATAAATCCCATTCAACTTTTTCCAATTGTCCTAAAAATATCCTTATAGCAAAATAGAAAATAAATTCTGACTTAGGATTTAATACAGGATCACTTATTTTATTTAGTTGTTGCATCATTTTGGTCTCCATTTTTTTCTGAAACAACTCTTCAGTCTTTTTTGGGCTTCTCGATATTGACATTTTTTGAAGAGTACTGGCCAGTTATTTTATAGAATGCCTTTCAGTTTGGATTTTCTGCTGTTTCTCCATGATCAGGTTCAGCTTACATATCATTGACAGGAATCCCAAGGAGGTGATATTGTGTCCTTCCTAATATACTATTCAGGAGGCACATGAGTCCTGAAGTTAGGCCTCTGAGATGCAATCAGCCAATCACTGTAAAAAGGCTTTCAGGGTTCACGTGAGTCCAAATTCTTAGCTCTACTCAAGGAGTCCAAGAAAAGGAAATAGTGCCCGAGCTGACACTCCCTACAAAGCTGGAAAACAGACTGGGGATGTTTTGAGAGCTTGGCTGTAGCAGTTTTGGTACTATCTTGTTCTAAACTAACCAGTGCCTGGCACATGATAGGTGATTTAATAACTGCTTGTTGAATTGAATCAACAAATGAACAATCCACCTCTCCTTTATTTTAG|GTACCTGTGGCATGTTCCATTGACATTCATCACCAGCAAATCCGACATGGTCCATCGATTTTTGCTAAAAACAAAAACAG|GTAATTTATTTTGGAAACTACTAGTTAATTCAAGGAGGAATGTGAAAATGTGTAGGATTAAAGTGCTGTTTCTTTTGCTATTTATCTTGGTTAAAGCAAAAGGGATCAGATTGAAAATGCTTTCTGGTTTCAAAGAAGAATGGCCAATATCTTGCCAGATAGAGATTATAGCTGAATTATATAATGCTAAAAAGGAGGGATTACCAGTGAACATTCACTATTACTTTGTTAACATTACTGGGTTATATCATATGTATGGCCAGAGATGTAGTTGCCATTCTTTTCATAGATCTGTAAATCTCACAAAATTGATGTCTAAAGCTAGTGTTAGCTTTTGTCTATTGAGTTGCTGTTTTCTGGTTTCTGAGAGAAATAAGTGATGTTTTCAAATTTTTCAGTATTAAACTCTTCTCTAATTTCCTTGCCCAATCTTGGATGAGAAAAGCTTTCTTCAGGCAGGGGAGCAAATGGAACTTTGATTTATTTTATTTACTCTAG|ATGTGCTCATCCTCCCAGAAGAGGTGGAATGGATCAAATTTAATGTGGGCATGAATGGCTATTACATTGTGCATTACGAGGATGATGGATGGGACTCTTTGACTGGCCTTTTAAAAGGAACACACACAGCAGTCAGCAGTAATGATCGGGCGAGTCTCATTAACAATGCATTTCAGCTCGTCAG|GTAATACACGCTGCACAAAGTCGCGGTTTATTTCTGAAAGCAGCTGTTATTGTTCAAATTCTTGATTTCTAAAGACAAAAATGATTGATTGATAACAAGAAGTTGAAAGGTGTTTTCCTTTAAGTTCTTCTAACAACCCAAGATTGCTTTTAGTCTTATAATTAACCTCCTGTCTGTGACTACTAAGGCACTTGAAGGAAGGAATCTGTATCTTAATCTTTCTCATTAATAATTCAGATCATACTGAGGCGGAAGTGTAGATAACCAGGAGATCAACATCCTGGCGAAACTCCTTGCCTTGTCCCCGCGGCATGGTCCCGCAGCTTCTTCTGGCCCCGCTATACCGGATCAGGTTTCTCCCACCACTGGGTCTTTATAACAGCCTTGAAGTCTTCTTAGAAGACTAAGTGACACAGGCCCAAGTTGTTTGCATTACATTTCTTGGGTTAAAGAAGGGATTTTTTTTTCTTTCCTAAACCAGATACCTGAACTTTGCAGCATCTTTATGGAATATAGCTCATAAAATGTAGCCCAACCAAATGATCTCTGAGTGTGTGAAGACAGAATTAACACTACCCTTTTTTTTTCCTCCCCCATGACAATGGTTTTTAAGGAAATGCTCCCAAAGCCTAAAACTCAAACTTCTTCAACATGTAGGCAGACCTAAAGTCCTAAGAGGAAGCATGCAGGTGGGAAGGATTATCTCTTTCTCCTCATACCCGTATCCTTCTGGCCTAAATTTTGAGTGCCTTCTGGTTCTTCTCACCACCATACTGGGCCTCCTGAAGTGAGAAAAAGCAATGGGGGAGAAAGTTATGGGTTGCTTACCTAGCTGTTTCTTTCTGCCTAAAAAATTCCCCTCTGTAGAATATTTCTGCTTGAGCCTTAGAGGGCTTCTTCTTTTTTATTTTTTAACTTATATAATATATACCCTACCACTGAGGTTTTTAACACTAACCCTCTGTAATGAAGGGTTTCTAAGGATATGCAGTGTCTTTGAAATGGAGAAGAAAATGTGTTTCCTCCAGGAACTCATGGTTTTGACCAGATGATAGTCTGTTTGACCAGGCTTCCTAAAGGGTCTTTGATGGAAATCCCTTGCTTACTCTAGAAGTTCACCACTCAATCCAGTGTTTCACAGGGATAGATGCTGATGTGCTTCCTTTCCGGCTTATCCACGTTAGCCCTAGACGTTACTGTTTGAGAAACTTCCTCCCTAGTGTAAACATGCCGGATGTTGTGTTATATGTGATAGAAAAGAGACTTATATAGTAGTTATCTATCTGTTTGTTTATTTATTTTTTGGAGATGGAGTCTCACTCTGTCGCCCAGGCTGGAGTGCAGTGGTGCCATCTCAGCTCACCGCCTCTGCCTCCCGGGTTCAAGTGATTCTCCGGCCTCAGCCTCCCACATACCAAGTACCTGCCACCATGCCTGCCTATTTTATGTATTTTTAGTAGAGACGGGGTTTCACCATGTTGGCCAGGCTGGTCTTGAACTCCCGACCTCAGGTGATCCACCTGCCTTGGCCTCCTAAAGTGCTGGGATTACAGGCATGAGCCACCGCGCCCGGCCGTAGTTATCTATTTATAAGGGACATTTATATCTGATTGCTTGTTTTGCTAGAAGATTAAAATATATTTTGAAGCAAAAAGTTGACTTCCCGGATTTACTTTTAAACCACTAACCACAGTGTCTCTTGGTCAG|CATTGGGAAGCTGTCCATTGAAAAGGCCTTGGATTTATCCCTGTACTTGAAACATGAAACTGAAATTATGCCCGTGTTTCAAGGTTTGAATGAGCTGATTCCTATGTATAAGTTAATGGAGAAAAGAGATATGAATGAAGTGGAAACTCAATTCAAG|GTAAAAGCCTGAAATAAAAGTTATGTAATTATTATTTGTGTTAAAAAGTGTTAATCATTGTGTGTGTATGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTATCTTTATATACATATTAAGGAAAAGCAAATAAATATTAGTTCAGTATTTAGATGGACTAGCAAGATTTTGGTTTTATTTGGCACTAAAAATAGGAATGGCATTCTTAGACTCTATTTAATTTGCATGGATTTGTTATTTCCTTCCTTTCACAAACCTTTTATTTCATTTTCCTTTCCTATTACAATGGCTGACGTTCAGATTTTCTGGGACTCTTCATGGTACTTGAGGAAGAGGCACAAAATTGTTATGCTTGGCAAAATGTCCTGAAGTCTTGTTGCATAATTTGCTCTCAAAATTTAGTCATGTAAACTCTCTTTGAGTTTGTTTTAAAAGTCGCAATTCATCCTGACTTTAAGTAGATGGATATCTTATAAAAGTTTGTTATGAAAATATGAAAGTCATTCATCATTATTTATTGTTTTTGGCCCTTTTATAATTAAACAGTGCTGCATGATTTATACAGTAAAAGTTAGATTATGCTTTAAAAATTAGCCCCCATCATCTGAGACCATAATGGATTATATTAACATGAAATGACCTGTGACAATACTGGTCCCTGTTTCCCTGTACAACGCCCTCAG|GCCTTCCTCATCAGGCTGCTAAGGGACCTCATTGATAAGCAGACATGGACAGACGAGGGCTCAGTCTCAGAGCGAATGCTGCGGAGTCAACTACTACTCCTCGCCTGTGTGCACAACTATCAGCCGTGCGTACAGAGGGCAGAAGGCTATTTCAGAAAGTGGAAGGAATCCAATGGAAACTTGAG|GTCAGTCCTTACTAAATAACCAATTTGTTGATGTGAAGGGCATCTTTTCTGTTTTCCATCATTGGTACTAAACATTAGGGAAAACAAAAAGTTTAAGTGTCTCCCCTGCTGCCCTTTTCTGGAAAATAAATTGCTTTTTAAGATTTATCTATGTATCTCGTAACTTTAAGAAATGCTAGGAGGGAACTTCTATGCATAAAAGTCAAATATCTGGGGAGTTAGGATGGTATGGGAATAATTCTCATTTTGTATAGGCAATGCAAAATCTTATTAAACGGTGACAGCCATGCCATAGAGAAACACATGCATTGTATTTTATAGTAGTTTCTTTGGGCTTCCAAATTCCCTGACAGGGATGCATAGTAATTGCTGACACTGTCTAAGCAAGGTAGGATTTTACCGTATCTGGAAATCCCTATTCTTGCTACACTTCAGTCCTTTTTACATTTGGAGCTTAAACCCCACCCAGGAAACATTTATGCCAACAACCTACATGCTTCAGCTTAAAGAAATCCAAACAGTGGTCCTACTGCCGTCCTGACTTGTGATCATGGTGTAAATTTTGAATATAGTTTGAATTTCTTTTGGCTTGAAAGGTGATCCCCTATGGAGTCACCCAGCATCATTAAAGTATTTAAATATGTAGGTATTTATAAAAATGGCATTTCACATTTTTGAGAAGGCTACTTAAAACCTAATTTTAGATATTTTTCTCTTGCCTTTTTTTTATAAAGTGTAACTAGGCCAGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCTGAGGTGGGTGGATCACGAGGTCAGGGGATCAAGACCATCCTGGCTAAGACGGTGAAACCCCGTCTCTACTAAAATACAAAAAATGAGCCGGGCATGGTGGCGGGCGCCTGTAGTCCCAGCTACTCGGGAGGCTGAGGTAGGAGAATGGCGTGAACCCGGGAGGGCGGAGCTTGCAGGAACCGATATCGAGCCACGGCACTCCAGCCTGGGTGACAGAGCAAGACTCCGTCCTAAAAAAAAAAAAAAAAAAAATTGTAACTGAAGGAAATCAGTAGTTTTTGTTTATATTTCCAGGCATGCTTCTCTGATCATGGGGAAAGTTCAGCAGTTCATAGTTAGTCACCATTATGCCTCTAAACATACTTACTGCACCTCAGATTCTCACCGAGTGTGTTCCTTTCTATAG|CCTGCCTGTCGACGTGACCTTGGCAGTGTTTGCTGTGGGGGCCCAGAGCACAGAAGGCTGGGATTTTCTTTATAGTAAATATCAGTTTTCTTTGTCCAGTACTGAGAAAAGCCAAATTGAATTTGCCCTCTGCAGAACCCAAAATAAGGAAAAGCTTCAATG|GTGAGTCCGTCATTCATTCATGTTCATGTGGCCAAGGGAAATTAGATTAGATTAGATCATTCTGGCATCTATTTTTGTTTTCTTGGCCAGGAATTGCCTATCCTGCTGGGAACATACTGCAAGTCAGCACACATCTAATGAGAAAGGCAAATAATTAGGGAGAACCAGGTGTTAGGAAAAATATTTAAAGGTGTATACATTTGTGCATCTTTAAAATGACAACATATTAAAAATATTTACTAGAGCTACTCTTCCTTAACTATTCTTTTGAGCAAATGAGAGGGTATATGAGAATGGGTGTTATGAACACTATATAAATGTTAGTTTTTATTATGTAGCTATAGTGGGTTTATCAGGATTGTGGTTACCCATTTATCTTTTTAACCTTTGTTACTGGCAGACTTTTTTTTCTTTTACAAGTAATTTTATTGCAGTGATTCATAACTGTTTCATCACCTATTCATTGCTAACATTTCTTTTCTTAATCCTTCTAG|GCTACTAGATGAAAGCTTTAAGGGAGATAAAATAAAAACTCAGGAGTTTCCACAAATTCTTACACTCATTGGCAGGAACCCAGTAGGATACCCACTGGCCTGGCAATTTCTGAGGAAAAACTGGAACAAACTTGTACAAAA|GTAAGTGGTGCCAAAAATTGTGCTGTGACTGGATAAGTTCATAACCTTACTGTGTTTTAGCCTTGCTGTTTGTAAAAGAACAGTAACAGTCTAAAGGTACTTTTTGATTGAAGATAGGCAGTAGAAATACCTAAAATATTTGTAGAAAACATAAAACTGGACTTCAGTGCTAACTAGTGAATCTGGACAGGGATGTTTTCCATTCCATCTGGCATAACCCCTTCCTGAGCCCATGGACATATCTGAAGCCTTCCTCCTCACAGTTCAGCCCAGGCCTTCCATGAACACATTTGCTTGTTCACATCTGTCTTTGTCTAACTCTTATAGCATTTCCTGCTTCTGTCATTTTCTGTTGGATACTTAACCTTTTATTAGGCTGTTGGTGTGTATTATTCTTTACAGCTAGATCTTAACCCATTGGATAGACATCATATTTTGTATTTTTCACACCGATCAGTTTTTAGCTGAAAGCTATTATATATAGGAGGCCCTTAAAATATATGTTAAATGAATAAGTATTTCACAACCCGTTTTTGAATATTTCCCTCTCTAG|GTTTGAACTTGGCTCATCTTCCATAGCCCACATGGTAATGGGTACAACAAATCAATTCTCCACAAGAACACGGCTTGAAGAG|GTAAAAAAAAAAAATCTATATATATATTTTTAAACATAAATGAAAATTAGCTAATTAATATGGGGTAGACAAAATACTTTGAGGGTGTGGTGAGTTAGAAATGGATTGTCATTTAGAAGTTATTTTTGGATGCTATGGTGTTGACAGCAGCATAAATCAGTTGCAATTAAACTAGTGAAAACTGTGCCTCTTCTCAGACACGTTAAGAGGTCTTAGCTCTGCCAGTAAAAACCTTAGGACTTGAAGAAAATTACTTGAGACAATTAGTCCTTGTTTAAGGATGTTAAAAGTGGGCACTGAGGTATAAATGACTGAAGTGGTCATCCAGCTGTTGCTAAAGAGGCAGGTCCGGAATCTAGGTTCTTTATAAACCATCCAAGTTCATGGTTCCCTGTCACTTATTATGTAACCATCTACCTTCAGAAAGAATTTCAGGCAGCTTTAAGATCCATGTATAAAAGAAATGTTAAAATGAAGGACAAAAAGATACATAATGGACAGGTGATAACCATGTGAGGGACTTATGGCTGAGGATAATTCTCGCAATTGTCCCCTGAATTTATCACAGACATTCCTGGAAGTCAAGGAAAAATGAGGAATCAGTATGAGTTATTCTCATTGTCTGGTAAAAGAGAACATGAAGCATACACATTTTCTACAAAAGCAGACTTTTCTTCAGTCCTAAACTCGAGGACTTTGATGTGTGGGCCATTGAGTTCTGTGGTGTCCCTTCTATAATAAGGTTTCCTATAGTAAAGTTGTGGAACAAATTTTACAAGGCTCTTAAACTGTGGTACATTTAGATAAAAGCTGAGAGGCTAATATTAATTTTTTTAGGAGTGGTTCCATTGTGTAGTCTGGTTACACTATTTTCTAGCTTGAGATGGGGAGAGAGCTTTGGAAATGAAAAAGAATGAAGGGTTGTTACAGTCTCGTAGCTTTTTTCCAGTTTTCAGTAGCCTCATCCAGGCTTGTAAAATTAACTTGCATATAATAATTCAAGCTTGAACTCCAGCGAGGTCTAGAGCAGAGATACTCTGTCTGATGACTGAAGAGATGTCCAAAGCCTTGACCAGAAAGGTAGTCATCTATAGACAAGATCTATAGCAACAAAGCATTTCATTCATTCACTTGTATTCTCATTAATTCATTCAAGACATATATGCCACACATGGTTTTAGGTTATGGGGCTGTAACAGTGAACAGACAGACAAAACCTTTGCTGTCACGGAGCTGAAATTTCTGCTGTGGGAATCAGACAATTACAGACAAATCAACCAATGTCAAGTAGTTACATGCTCTGAAGATGAATAAGGCAAGGAGAGTGATTGGGATAGAGGGTTGGTGCTGTTTTATAGGATGATTAGGAGACATTTGAGAAGATATCGAAGGAAATGCCAACATCTAGAGGAGTATTGTGTGCAGAGGAAATATCAGGTGCGAAGATCCTGAAGCAGGGCATGAAAAGTAAAGAGCCCGAGATGTTATCATAATGGAGAGCAAACTACGGACATATAAAATACGGGAGTCATCCTGAATCCTTTCTTCCCCAGACATCTAACTACGCAAGTCCTGCCGGTCTTCCCTCAGCACATGCACTGGATCTTGCGCACTTCTCTCTGCTCTTACCCCCATTCAATCCTTTCTCATCTCTTGCCTTGCCTATTGCAATAGTGTCCTAACTGGCCTCCCCCTTGCCACTTTGCCAATAGCAAAACATTTTCCACATTGAAGACTGCAAGTCAGCCCATAACTGACCCTGGCTTCTTGAAACCCTGCCTATTGTTCTTGCAGTAAATTCCAAATCCCTCCTTCCCATGTGTCCATGACCTTGAAGACCTGGCTTCTGAGGACCTCTCTGGTGCCAATTCCCACAGGCGCTCCTGGCTTACTCTCTCATAACACTGTAGCCACTCAGCCTCCTTCTGCTCCTGGGCTACCACATGTTTCTCCTGTCTTTAAGCCTCTGCACTTTTTATTCTCTCTGCCTGGCTTGCCCATTCCCCAACTTTTCACTAGAAAGGGGCATTTTATCTTTGAGAGGTCTCGGCTCAGATCACCTTCCCCAAAACCATGCTGTTCAATGCAGGCCACATCTCCTCAGTTACTAGCATACCCCTGGCATATTTTCTTCATAGCAGTTTATTTACAGTAATCATATTTACTGCTTGTATACCCCAAAAGACTGTAAGCTCCATGAGGGGAGGATGCTTGCCTGATTCACTGCTGCATCCTCAGTGTCTAGCACAGTAATTACAGCAGAGGGATGAATAAATATCAGAGTAAATATTGGTGAACAAATGGATGAGGTTAAGTCCCTGATACATGCTGGGTTTGGGGCTTTGCACTTTACCTGCATTATATAACTACATCCTTCACCACTGCTCTTCATTTTACAAATGAGGAGACTGGCTTCCATAGAAGTTAAATCATTTGCTCAAATCAAGTTAGTAGGATAAACCTGTTTTCCTATCCCTGTATTGTGCTACTTATTGCACAGAAATTGTTCTTAAAGAGCCAAGTCTGAATCAAATATTCAGTGGAGATGTTGCATTTCCAAGGCAGGTGAAGACAGAAGAGATGATTTTGGGTCAGGACAAGGGTAAGAGTAATGGTTAACAGCTGGCTTCGCTGTTGTAGAAGGTCTGTTTTTAGTGGAGCTATCATGATGAGCTCCTTTAGGATTAATGATTTGAACAATTAGTAAAATTATTGATGTGCTTTGTTGAGTAGTGCTCAGTGATTTTTAATCGACTGTGAATTAATCTTGCATTCTGAGAGCGTATGAAACAGTAGGTTGAGCTACAACTTAAAGTAGAGAGTGAAGTGTTTTCCAGGTTGCTACAGCAGGAGGCCATGCCTTCTGCTCATTGTAAAGTTGCACTGTATTGACATATAATTTTAAAACAACTCTGCATCATTTAAAATTGAATTCTGATCTTTCTAAAACCCATTCCTACTCCCCTCTTTATTCCCAAACTAATAATATGGTATTATGACATGGATTTCTAAGGAACTGGCTGTCTGGAATCTATGCTGAATAAATAATACATCATGGTCTACATTCGCTTCCTGAATACCAAATCAAAAATTGATGGATTAATGCTGTGAAAATTTATGGGAAAAGGATAATAACCCTTTAAGGTGAAACAGAATCGCACAATGGTCAATCTTGTGGCAAAGCCAGCCTATCTGTCATGTGAGCTCAAGGGGTGAATTAAGATACCCACAAATGAAACAAGAACAAACATATTTTTCAGGAGGTAGCCAAGAGTTTCTTATCTCTTTAAAATCATGGGGAACTTTATGTGGATTTTATCTTGAGACAACAATACATGAATTGTAGATTAGGATAAAAAAAATGGCAAGGTTTGGGTCTTACCGCCATAACTTGCTAAGAATCCCATTCCCCACAGTGTTTCTATAATAACATACAAGGCCAGGCCCATGGAGAAAGACAGCACTTACGTGGTGGAAACTGTTTTGCTTGGCAAAGAAAAGACTCTGCACATTCTGCTTTTTAGATATCATGTTTTTAAAAAACGAAGTTTGCATCTGTGAGAACAGAACAGAATAACACATTAAATAGACACAATTAAACCTTAATTATAAAATGGTAAACAGTGAAGTCTTTTAAAGTCTGGCAACTGAGAATAAACAAAAGAACTGCAGACAAAAAACAGAACTTGACATCATGAGGCATGAGCTCATTTCATACAGCTTATGTGTACATAATCCTATTCAGACAGCTGGGACTGCCTTCTATATAGAATTTTGACAAATGCTGGAATTTTGGCTTCAGTTTTAACTAAAGTTACATCTGATTAATGTGATAAAATTAATTTTTTAAAACCCACTTTTTCCTCACAAG|GTAAAAGGATTCTTCAGCTCTTTGAAAGAAAATGGTTCTCAGCTCCGTTGTGTCCAACAGACAATTGAAACCATTGAAGAAAACATCGGTTGGATGGATAAGAATTTTGATAAAATCAGAGTGTGGCTGCAAAGTGAAAAGCTTGAAC|GTATGTAAAAATTCCTCCCTTGCCAGGTTCCTGTTATCTCTAATCACCAACATTTTGTTGAGTGTATTTTCAAACTAGAGATGGCTGTTTTGGCTCCAACTGGAGATACTTTTTTCCCTTCAACTCATTTTTTGACTATCCCTGTGAAAAGAATAGCTGTTAGTTTTTCATGAATGGGCTATCGCTACCATGTGTTTTGTTCATCACAGGTGTTGCCCTGCAACGTAAACCCAAGTGTTGGGTTCCCTGCCACAGAAGAATAAAGTACCTTATTCTTCTCATTTTATAGTTTATGCTTAAGCACCCGTGTCCAAAACCCTGTACCCCATGTTTATCATTCATAAACTGTTTCATCAGTCTCCTCGAAAGACTCTGAATAGTCGACTACTGAACAATGAACACCTGGATCTGAGACTAAGCCGGACGATGACTGGGTTAAAGCTCTCCCGGCTCACCCCTCCAGACCCGCTGCCCATCCCTCTTCCTTGCTCCATGCCCAGGGGCTGACTTGTAAAGGCCAAGTCATCAAGCTTTCTTGCCCTTTGGATGTTGGTCAGTGGGGAGCCGGAGAGCTGGAGCTGGGGTCGGAGGAGGTAGTAGGTGGAGGTGTTCTTCCCTGATTCCCTTGCGGGATGCCTCGGGCTGGCCTCCCCTGAGGGTCTTAGCTCCGAGAGGGGACCCTCTTTTCCACACAGCCTTCTCCACCTCTGGATTTTGGTAACTGCTCCCTCCTCATCCCTTCAGGATTAGTGGCCTCAGTGGGAGTCTGGCTTTTACTAGTCCTGGCGGACTTGTGGTTTCTACATAATGTGCTCGCACTTTTGCAAAAAATCTTTTTATAGAACCCTCCTCAGATAATTCTGAGTGAGTGTCATCTATTTCCCTGACTGGTACAGTATCTCTTCTGAAAAAGCAGAGTGCATTCAAGTCTGTAGGAAAACCCTTTTCTTAGGGAGGTGATTTTTTTTCTCTCTCTGCTTCTTATTTGGCCTACTTTACAATTTCTAACTAACTAGTTATTGGCATTTACTGACAGTAAATTATTGCAGTCACCAATAAATGATAGTACATTGTGAAACAAAATATTTGCTCATATTAGCAAATAGGACATTCTTTGGCTTTGAAGTCTTTCTTTCTTTTGTGAAGACTTCACACACGGTTGCTTCAGCACACAGTTGCTGCTCAGGTTTTATGTATAGATGATAATAATAGAAAGCACAGTTTACTAACATGGTAAACCAACGGAGTTCAAGTCAAGTCAGTTAATACCCTAAGAATTAGATTTTATTTCTTATTCTGAAAACTTGCTACACAGGGACTTATCTAACCCATAGTGTGCTCTGTTGCTGACTTGATTCAAGTTGCAGCGTGTTTTGCGCTGACTCTAAGGTGCGGAAATCCTCACACCTGGCAAAGGAGAATTCAAACTGAACTTTTTGAATATAAGGCAAAAACTTCAAGATAAGGGAATATGATTGATGATTGGTACGAAAAATGTCAAAATGTGTTCCCCTAATACACGACAAAATAGAGTGACTTCTGGACATAAATCTGCCATTTATTAAACCATTCACTACAACAAATAAATAGGTATAAAAGTGGAATTGGAATTTTTATACTTATTTGTTGTAGTGAATGGTTTAATAAAAATAGAAATCACTGGTAATTTCCACCCCAAACTAAACTATTTCCCTTCTTTTAAAAAAATACACAACCAAGATTTTAATGTAAAATATTTTGCTTTAATTGTATTTTATGCCTTGATTAATGAAACATGGAAATATTGATTTTCAGTTTTGGTCACCTGAGGAACCTATCTTTGTTTGCTTTTGGAAAAGCCCATTTTCTAAACAGATACAATATTGCCACAACAATGTGCAGAAACCTTTTTGATAATAAAAAATTGTTCTTTGCCTCTAAGTGGATATTTGCAATTATTTTCTCTCTCCTAACTAGACTGTAAAAAGGGCTGCTTTAGATCCTGTAGCTTACTCCAGTTATTAGTTATTAACAAACACCCAAGTCTCGAAGATATTTCTAATTAAAAAAGAAGGCATATTCAGAGTTCTTTTTAAATAAATGTTGTTTACTTTTATAGGCATCTTTAAACTTCTGGATTTTGGTATGCCATTTAAAAATACTTCCAGATACACATGGAAATTAGTAATACTGAAGCCGTATCCTTGCAAACACATCTGTCAGTGTCAAAGGTTTCAAGGTTTTTCTTAAAAAAAGAAAACAAAAAAGCAAACACCTATACTGCCCAAATGGGAGGATTAGATACATGGTTAGAAATCCCTCAGGAAAAGTGTTTTTTCTTTTCTTGTTGCTGCCTTAAAAATAGAATAATGACTATTTCTGATGGATAGAGACATAGCATTTTAAGCTGGTGGTGTGTAAAATCCCATAGGTATGTGCATGACTTTCAGAGAGTATTTGGGGGGGAGGTTAACAAGATGTGGTGCCATTTATAAGCAGTGTTATTGTTTTTGCTTGCCCCGCTGCCACAAGTCAGCTAAGTCATAACAAAAGCTTCAAACTGATGCTAAGGAAGGCCATGCCCTTTGGAAACAATAAATTCCCAATCTGTTTTATGTTATGTACCTGACATCTTTTCCTGCATTCTCTACCAGGAAATAAAGATGAAATTAAATATCAAAATTCTAATCGATGATATCAGTGCAATGTTCAGGAACTATTCATTAAGATATTAGAAAACCATTCAAAGTGGTAGGACATCAGAGCCTACTTCTTACATTGCTGTGGGAGAAATGCAGGTTTCAAATTTAATATATATATATAATTTTTTTAAAAGCAGAAGTTTCTTTTTATATTTGGTAAACTTAAGTTCCATAAAGCCAGACGCTATACAGTGCAAAGGCTAATGTGGCAATAGCTCTAAAGACACAGTTGCTGCTCAGGTTTTATGTATAGATAATAGAAAGCACAGTTTACTAACACAGTAAACCAACAGAGTTCAAGTCAGATGAGTACCCTAAGAATTAATTAGATTTTATTTCTTATGCTCGAAATTTGCTACACAAGGACTTATCTAACCTTTATTTTGCTCTGTTGCTGACTTGATTCAAGTCTCAGTGTGTTCTGTGCTGACTGTAAGATGCAGAAGTCCTCACACCTGGCAAAGAAGAGTTCAAACTGAAAAGGGGTTTGGTGCTTCCTGGTTTGTCCAGGTTACCTGTTATTAATTTATTATTAGCAGCCCAAGAGGAGATATGTGCCCAATGTACAATATCTTATGTTTGACTTATAAACATTATCCCAAAGCAACATCAAATACAGTTCAAAAGCCCAAGAGGAAAGGGGGTAATAAGAAATCAGAACACTGAAGAATGTTTAAAACATTGTTTTCTAAACACTAACAAAAAAAATTAAGGGCAAACTGAAAATACAAATGAGATTTACAGGCACTGTGTGTAGAATGTGCAAAAATTCACTTAGCTTTTCTTTTGTTTTTTTGGTGTTGCTTTAAGAAACTTTATCAAATATATTTCTTACAAATATAAAGCTTTCTCTCCCAATTGAAGGCAATTAAAAAAATTCAAAGTTTATCAATACTCAGTACACAGGTGAACCAGTCAAATTCATTTTCTTTCTGGAAAAGAATAACAAACCAATATTTAGGATGTTCAGAGACTCAACAAAAACCATTCTAGAAATCACCCAGAACAATTGTTTTCTGTTGCCAAAGCCTTTTGTTCTTCAAAAGTCACCATCCACCAGCTGAAGATTTTACATGCAGATACCTTAAAAATTTCAAATAAAAAATGCAGTGAATCATTTAATATGTAATTTTCTTGTTACAGACATAGTAAATATACCACTTAGCAAAAGCATTGTATAACAGACAGAAGGAATTTCCTATAAGTAAACATGAAAGTGGATTAATAGAATTTTTTTTAATTTTGGGAAAAATGTTAGAGCAGTTCTACCTAATATAGCTCCTTTTTTCCTAGGAAATAAACATGGATCATGGTGAGAGAGCTGAACCCGATTTAACCTATACTTTGATTTCTTTTAGGCTTTGGTCAGTAAGTGCTTGTATGCTTTTAAGGCTTACATTAAGCCCTCTCCTTTCTGAAGATTAAGATAAGGGCCCAGTTCTGAAGATCTCAGAAATCCCTTCAATCTGTCCAGTTTCTTCAGCAATTATAAATTAGAATCAAACACAGTACTTTACTTTCCAAAATAATGACAAATAAAAATGGCCAATCTTTTCCTTTGCCCTTGTTCCCAAAACCCTGTAATTTCCACCAGACTCTAAAGGGTACTTCTCCCCACCCCACCATGTATTGGTATTCTGGGGAATGCCAATGTCTTCAGGACATTTCGCTAAGTTGTAACATGCTAATTTTGCTTTGCCACAATAACCTGCAGTAATGAGCATTTGGATTTCAAAGATTCAACTAGCCTCAGATGGTCATTCTAAGTGCCTGGCCTAATATTTTTAAAAGCTTTTAATTATAAGATTAAGGTTTTTAAACATTTCAATAGCAAACTCATCCCATTTAGTGCTTTCAGAGAATGACCAATTACTCTGTAGATCTTGCAGTATGCATTTCATGCCAATACTGTAAAGTGAGCATGAATTACTCAAGAGGTGGACTTCACTTCTCTCATCTATAACACATAAATTGGCAGAAGATACAGTTGTCTTCATTTACAAATAAACACCCAACTTACCAGATACCTTAACTTGTATTTCTTTAGTCATCTTTTGGCTTGGAAGTTTCCTCTGTTGTCTAAAAGGGAAAGCAAAACCATCCGTGAGCTTTCTTTTCTGTATTAAGTATGAGGAGATGGCCTTCTCAGAATTAGGGGACAAAAGATGGCAGTCAGTGGGGAAAAATAAGATGGTCCTTTCAGTTTCTCTTCTTCATCTGGCCACAATATTGTGAGTTCTTCCTCCTCAAGCTTATAAGCTAAAAATAACCTTAAGTGATCCTGATCCTAAATGTATCACTCTCAGCTTTATTTTTTTAAGGCTAGGGTAGGTTATAAAATATGCATAGGTGTTCTTAAATGGCAATGTTTCATTCTGTGGTGATCCTTCCTTTCCTGTACATAGGGTCATGGCTGTCCAGTAAATCCACTCATCATAAAGGGTTATTATGCTTTCTATTATTTGTTAAAGGGCTGATTAAGTACTTCGTATAACTGAAATTAAATAATAGGCAAATAAGTATTTAAACTGAGATATAATGGCATAACTGCCAATTATCAATGCTGTTTCCTATATCCCTTAAGGGAATCTACTACAGGAAGATTTTAATATATTGTTTTAAAAGCTTTGGTGTAACTGGACTATTGTCTTTAAAGCTACACCTTTAACTCCTCCTTATAGCCAGGGGATCATAGTAATAATCATTTAAATCATATGTTCTTGGAATTGGAAGGGACCTAGAAGTTGTCTAATCCAATTTTCATTTCTCATCACAGCTTGAGTAATTTTAATAATAGGAAGTTTCCAACTTCCATGTTCTAAAATCTATATAAACCACATACTATGGTGGTATTTTAATTAGGGCAAGAAAGACAGGCAAAACACAGGCAAATTGGGTTGTGTCAAGAACATTCATTTAGGATTTTAAAGCACCAGTTACTTAAAAATATATGTATTTATACAAATTCAATTACTTTACCAAGCGATGCTATGGGATAGATAGTACTTTAAGAAATTTTATTCTAAATAGATTCCAGTAGGAAACTAAATGACTGAAATGATAAACCCTACTCTGTGTAACTGCTAAACTAATTTGTAGTATATTTACTGCTCCATTTACCTTTGCTGAATCCTTCGCTTTACCTCCATTCTTAGGTGCTTTGGAGCTGGAAGCAGCCTTCTTGCACTTATCCTAAAGCAAAGGAAATGTAATGAGGCTATGGCTATCACCAATCCATCTAGCAATTAACTAGGCTGCAGTTAAATTAATCCAGACCATTCTGAGATCTCCAATTTAATTAAATAATGGAAGACTTTGGGTGTTTTTTCATGATTTTTTTTGAACAACGGTGTCAGAGAATTGTTTAAAGCAGGAGGGAAAAAGGATAGAAATAAGGAGGGAAATGTGGGTGACATTGATGCTCTAATTCCCGTGGTGCCTGACTCACCCAGACCTTATTTTGCTAATCAAAACAGAGCTTGTCAATAGATAATAAATGTCGGCAAGGGTGTGGAGAAAAGGGAACCCTAGTACACTGTCAGTGGGGATGTAGATTGGTATGGGCTATGATGGAAAACAGTATATAGGTTCCTAAAGAAATTAAAAGTAGAACTACTGTATGACCCAGTAATCCCTCTTCTGGGTATATACCCAAAGGAGATGAAATTATCACCTTATAAAGATATCTGCACTCCCATATTCACTGCAACATTACTCACAATAGCCAAGATATGGAACAACCTAGTTGTTGATGAATGGATAAAGAAAATGTCATGTATATATACATAATGGAATATTATTCAGCCTTAAAAAACGATATCCTACTATTTGTCACAACATGGATGGACCTGGAAGACCTTATACTAGATGATATAAGCCAGACACAGAAAGAAAAGTGATTTCACTTATATGTAGAATATATATAAAAGAAAAAGCTCAAAAACACAGAGAATAAAACATGGCGACCATGGTAGGGAACAGGAGGAGGAAACAGAGATATAGGTCAAAGGATACAAAATAGCAGATATGCAGAATGAACAAGTGTAGAGAGTTAATGTATAACATGAGGACTAAGGTTAATAAAATTGTATTAGGGATTTTGTTAACTAAGTAGATTTTAGCTGCTTTTGTCACAAAAAGTAGTTGTGTGAGAATGATAGATATGTAAATCTGCTTCCCTACAGTAACCATTTTATTATTTCTATGCATCCCAAACTACCATGTTGTAAACCTCAAATATACACAATAAAATGTATTTAAAAAACAAAATAGAGCTTGTCTCGATCAGGACTGGCTTTTGTGTACCAAAAGGCAAAAAAAAAAAAACAAAAAAAAACCCTGTTTTCAGTGTTATGGGAGAGAAATGAACAATGGGAAACAACCGAGGAAAGCTGGAGCAGGTTACGTATAAAAATAAAGTCCATTCACCAAAAAAGGCATTACTTACGAGTTACCAGGGGTGAGAGATAGGATGCTGAAGTGGTCTAGAAATTAAGCTACCCAGTATGGAAGGGCTGACAATTCAGTGATCGAGAGCAGTGCCTTAGAACAGCCAAAACAATAGCAAACTGAGATCTGCAGAATTAACTCTCCTGAAAATAACAAGGAGGTACTCATTTCACGTTTCCTTCTATTTGATTTACAAGAGGGTGTAGCTTGAGGGAAAATGCCTCACACTTGTTGAATTACACAGTTGTTTCTCATTCACTTTTAATCACGTTTTGAGCACCTGCTAAGTACCAGGCATTTTGCTAATGAGGAGCACAGAGGTAAAAGACACATCACTACTGTATGAAATGCGTAGCTCAGTGGTGTGATACACAAGCACAGAGAGGTAACAGAGAGCAAGGAGGGCATGGAAGAGAGGCCTCTAACTTTGGACTGGGAAGGGAGAAGATGTAAGACAAGAAAGTCTTCCCTAAGGAGCTGATGCTTGAGCTGTGCCCTGAGGAATGAAGAGTAGACCAGTTGGGCTAAGCAGACAGAAAAGGGGAGGAAGCTCCAGAGAGCAAATGAGCATGAGAGTGCCTGATGTAGTTAGGGCCTGCTCTCACTTTAAATGAACACAGACATAGCATTATTGTGGCACAACCATATAGTGTGGAGATAAAAAATGGTGGCTATGGAAATTACAAAGTAGCAGTTAAGAAATAACGTTAAGCAGTGTTTTATAAGTGGACTGTAAGTATAATTATGTAAAATATACATATAGAAAAAAAAGGAAATCCACAAAATAATACTGTTTTGGGGGCAGTGGAATTATAGGCATTTTTTCTTTTCTTCATTTTCAGGTTCTCTATATCATCGTTTGATTCATTCTACAGTTTAAAAATTGTAAGGGCCAGGCGCAGTGGCTCAAGCCTGTAATCCCAGTACTTTGGGAGGTAGAGGTTGGCAGATCACTTGAGCCCAAGAGCTCGAGACCAGCCTGGCAACATGGCGAAACCCCCTCTCTACAAAAAAATACAAAAATTAGCTGGGTGTGGTGGTGCACGCCTGTAGTCCCAGCTACTCAGGAAGCTAAGGTAGGAGGATTGCTTGAGCCCAGGAAGCAGAGATCGCAGTAAGCTGAGATCACACCACTGCACTCCAGCCTGGGCTATAGAGTAAGACCCTGTTACAAGACAGACCGATAGATAGATCAATCAATAAATAAAACTTATATGTATGTACACATACACACACACACATTTCAAAGAGTGAAATGTGAAAAAGCACAGTACCTTTGCTGTGTTCTGTGAGGTTTCTGTAGTGGAGGGACAGCTGTCCAGATCTCCTGAGAGAGCATCAATGGGGTCTTGGTCATCTGCAGGTTTCTGAGATATGAGTAGAAATAACCATCAGTGAAGAAGCAGAAGGCAAAATGCAATATGGGGTCTTTTCCCACATCACTTACAAATAAAAGATGTTTCTATAAGAAAAAAACTGACTGACATTCTTTATTAATAATAATGTATTGTAAAGGAGATAGAAAAACAAGAAAATCTTGATGGCTTTTTTTCCATCTACTCTTTAATACACGTTGCTTAGCATTCTCTGAGCCTCAGTTTTCCACCCTAAAGGGCTTTTGTGAAGACTAAAAGAGAGGTAAACAATAGTCACACACATTTATATGCATGCTTTGACAAAGTACCAGGCACAGAGTAGGCATTCAATATGTTTTAGTTTTCTAAAATGCCAAATACCCCTATGGCTAGAATAAAACAAAATTTAATGGAAATATGTTCCTATGGTCTTTACCTTTGAATCCTCTGATTTCTTTGTAGGTGGCTTCACTGGTTTGTCCTTAAAAAGAAGGCAGACTATTGGTTAAGCATAGATATCTGTAAAGGTTTACTTAGGTTTAGGCAGTAGAGAATCTATTGTCCCATGACTTGACTTGGATAAAATGGAGTGTAGACTTGCAATAACTAATAAACCTGAGTCCCCACTATTTCTTTACCAGCTTTGCTTAATTACTATTCTTTATAATCATTTGTGTAGACTTGGGGGAAACATATGAAAGGGTCTGGTCCTGGAAGTGAAAGAAACACTATTTGTGAACCTTCTGCCATCATGTATTATCACTTATAATTCTACTTAAAGTGTTATAAAAAGTTACTTGTGTTTTTACATCTTATTTACTATAGCTTATTACACTTAGAAAGTCATCAGGGATTTTTCCCAGCTTCAAAGGCAAGGGCCTTAAATAATAAATTTCCCAAAGAACGCAAGCAGGGTGAGTTGGTACTATCAAAGTGGGAAGGGCTCTAGCAGGATGTGGAATTGCTATCTTGAGGAATACTGAATGCAAGCAAGGAGAATTGGTTCCTGTAATAGGAGACCCTGAAGCTGACACTGCTATTTAAACCAGGAATCATCTTACTCACCAAAATGAATCAGTACAGAAAGAGAAGGGTGTAATTTGGTCTTCCTTGTTTCTTATTTGATGAACAGGCATGGAAAGGGTTATTTATCATCTAATTGACTCAAAAATTAGAAACTAAATAGGAAAGAGGTGGGGGACAGGCCTGGTTGACCTACGAAAGACTGGCCTCTATCATGTGGGAGACAAAAGGCCAGGGACTTTTTGGCAGAGAAACAGGATTTGTGATTGGGAATATTGCTTTGCCTGTCTTCACTTGCAATAGTGCTGATGATGATGCAGGAGAAGATAGGGAGACCCCAGGTCTTGGAGCTGCCTTATTAATTTTCCCTATTAATTATCCCAACACCAGCTCCTTTTCTGCTCTCCTTAAACAGAGATTGCTTCCTTTGAAATCCTATCACCTTAGTCATTTTTATAGCCTTTTCTTGAAAGAGAGAATCACATTTCTTGGATCACTCTTAAGCATCTGTGAGTGACTGAACAGTATTCAACCCTGTTTATACCACAGAGCCTAGCATATGCATATATAATGATATTGGCTCAAATAAATATTTACCTGCTGGTCGGCTTTGGTGACACATTAGAAGCAGTCAGTCATGGTATGTTTTATGCTATTGTAAAGGAGTATAGCTACTATTTATTTCAAGGGAGGTTTGAATTTAAAGATCTTGGTAGCATAAATCCGATCTAGCAATTTGCCTCAGTTTACCTGTCCATTATCATCCAGGAGATGTCTGTATTCAGGTGGGATAGTGTCATCTCTTTCTCCAAGCTTGTCTCTATGTTCAGCTTTAGCTTTTTCCTATATCAACAGTGAGCAGATAGAATTAATATTCATTTCCTCTTTCACTTAGAAAATACATTGTCAAATGCAGCATTCAGCATTTGTTGTACTTTCATGAGGCAAAACATATGGTCTGTTTTTATTTTTTAAGCAACAGAACAACACAGATGACTTCAACAGACTGCTAGTTGGCTAAAATATAAAATCCCGTATGCTTCTGTATGTCAATTCATATCTGTGAATTTTCTAGCTATATTTTAAATGGAAATAAATGATTAAATAATTATCTTCAGAAACCATGTTAGGAGATTAGAACCCAAAGGTATAAAATATCTTTTTCTTTTTTTCTGTATGGTTTCACTTTTCTAATACAAAATCAGGCCACTGTACCTTGCCATTTAGAGAGGTCACATTTACAACTTTGCCTATTTATAAGCAACCCTGAAGGACAACAGCTAATTTGAATGGATGTGCTGACTGCTTGTGTTGCATGGGAGGAAGCCATGCATACCCCACACCTACCTCCCAGAATCCCCTCAGGGAGGCTCCGCTGGTATCTCTGTCACAGATTCAGTGACCTTCCCCTCCAGTGGAGTACAGATTGATTTTTCTATTTCATTATAATTTCATGTTTAAAATATAGGTTAAGTTCACATCAATATTCCTATGACAATGACAGAGTCAAGACCCAAGGATTAAAATTTCACTATTAGTGAATTTTTTTTTTTTTTTTTTTTTTTTTTACCTTTACTTTATCTTCCATTGGTTTGTTCTCATCTGGGTCAGGCTGCCTTTGTCCTAGACTGTCAGAGAGTTTATCCAAGGCATCATCGAGGTCTTTGTCACTCTGCTGAAAAGTAAATAATGCTGAATTAGTCACTCATTAGCCAAACTGTATCAGAGGCAAATTAGCCCAGGAACTCCATCTTTTCAGGAGGGAACCTGTTTCCTTGGAAGAAAAAGACATCTGGGGCTGGGGCAGGGGAGTAGACAGGGTCAGAGAAGAGAAGCCTAGGAATGGAAGATCAGGAGAAGAGCGGCAGTATCTGTCACCCTACTGGGGTTGGAGGGCCAGCACCTTCCACCCAACCCTGCCCATCTCTTGGTGAAGATCCCACCAGGTTAAGGAGGTCTTAGTGGCAGCCTCAGGAGCCATATCCAGTGGGTGACCTGGAGGTCACATAAAGGGTCCAAAAGCAAATGAACCAATCATGTGTGCCTTTCATTTAGAAATTAAACACCATTAGAAAACTGGATATGAAAACAAACATCTACTAATGTTGTCAGATGGTTAGGAAGCAAGATTCTGCAACTATAGAGGGTAAGTGTTTCTTTGGTTCTGTGGGTCCTCTCTAAAACTCTAAGATCTTGAGGGGTGCATTTCAGAAAGTGCAGCGTGACCCGCAGTTTGTGGGAAGCCATGGAGCTCGGCACTGCCATCCTAATACTTCCTAAAGCACAAAACCCCAGAGACAATCTGGGGTCAGGAGAGTGGAAGGGGCTTGTCTGCCACACTGGTGATGAGTGCCCTGAAAGACTTCAGAGAATTTCTGAACTGGTGGGGAAACCTCTCTTTTCATCTTCAGGAAGCTCATGGAAGTGAAATTGCAGAAATGGGAGCTGGTATTCTAGAGGAAAAAAATTATGGACAACAATATCACTGTAACTAAGATAGCTTATTTCCTCTAACATTTATTTACTGTATGATTCAGGCAGCTTATTTAACCCCTTTCAGCTTCAGTTTCCTTGGCCGTGAAATGTGAATAATAGTAGTACTTATACTCCTAAGTTGCTGAGAAAAGTAAATGATTGAAAAGGCATTTAAAACAATACTAGTTGTATGTTAGGACCCAACAAATGGTAAATTATTATTAGTATTATTATAGCAAAATCCATATTTTTCAACACATTGCATTCAAAATTCCACCTCTAAATGAATTCAATTAAAATGTGTTTAATATCTACATTGTATAAGACACCATGCTGAACTCTGTCCAATACTGTATAGAACTTTCCAGTTGATTTTCAAAATGTTTTCACATACACTATCCAGTTTTATTTGATGCCCACAATGGTTCTCAGTGAACTAAGCAGGCTTTTTTTTTTTGAGGTAAAAAGCTCAGGGAAGCTAAGTCAGTTGCTTGAAAACAAATTGTTAAAAAGTGAATAGAACCCAGGTCTTGGGACTGATAAAGCTTTTCCCTGTCATGCTTAGTCACATCCATGATCTTCTATTTTCTTTGAAGCAGTTTTCCTGTTGGAGTGATTTTATTACACAGATCTTTGAAATCATGTCTTCAAATGCTTTCAGTGTATGTAACACTGTTAGTAACAATCTAATAATCACAGCAAAGAAAGCTCCCGTGAATTATCATGGTTTATTTGACTCTTCGATTTCCTAATATTTTTATCTAAATAAAGCTTTATACCTTGTTTTAGTGACATCTTCAAATAAAATGTTAACTAAAAACAAGCCTCTCTGATGGGAAATGTGATCAGAGAAGTGTCATTGTAAAACCTACTTCTTAAAGGCAAAAAAGTTTTTGATTGCAAATGTTTACTGATAGCCTTCATCAGGGCAGAATCTCTGGCCTGAATATTAAGAACTGAAGTGTAAACGGCAGCCTAGGCTATTAATGATTCTTCCTTTCTGTTGCATGGGGACTTTCTTCATTGTGGGTGTGTTTACATACACACATGCACATGCACATGCACATACACACACACGGGGCATTTTACTGGTTTTAAGTGCTTTATTATAATCCAGGATTATAGCTGCTAATGGTAGAGCTGCCCGGGGCCAGGTCTGGGCTTTGTCATTTGTGCCTCTGGATATTTTCAG|ATGATCCTGAAGCTGACGCAACAGGATGAAAATCCATCAGAATCTCAGACTACAGCACTAAATATGCTTTGATGCTACATCAAACGGAATGGAAGCATAGCTGACTTCGCTAAAGTTACTTCATCTCCATCTAGCAAATGAGGCACTGTTCTCAACCAAAGGAGATGGGGATCTGGTTTAGGGCAATCCCTTTATAATTTGATGTGCTGTGGTCTCCTTGGTAATGTATAATTTGGTATTGCACAGGTGATTAGTCAAGGAAGTCTGGAAAAGCTTTGGTCCCACAGCCTTGCCTCACAGCATGTAAATAATTAAAACAATATTGATGCTGAGGTTCTTCTACTGCTAGTATGAAAGTGACAAATTTTTACTGGTGTGAATTGGGAAGAAAACAATGCTATTCCATGACGTTTGTAAAATGTTTGTAAAAGCTCAAACATGACGATTCCATAAAATAAACTTGAGGTTAAATAATGGGTAGTAAATTATAGAATGTAT|AAGAAAAAATATAAAGGAGAAAATCAATTATCAGGAAAGCTAAAGAACTTTTCAAATCTAGTAATTTGAATATAGACACAATGCACTTTATTGCACTTTCAATTCTTATAAAGCAACAATAATATTAAGGTCCTTGACTATGTGTACAATGTTTTCACATATATAGTTTCATTTAATCATTTCAAAGTTAATCTCTGCCATCTCGCTAAATCATCAGTCTCGGCTCTTCTGAAATAGAAGGTGCCTGATCTTCCTAATAATTCTGCCTATTTTCATTTGCTTTAAACAGGCGCCCTATTTTCTTTCTAGTTGTGGCTGCGCAAAAACATTTATCTCCCAAATAAGATGTGCTGCTTACCGAGGTATCACGGGGTGGGGCTCCAGCTTGGGTCGTTGAAGCTGGGGTTTGGGAAACCACTTCAGAGATGGCAGCAGCAAGTTTAGCATCTTCAAATTTCTTTTATTGAAAAAAATTTTATTAGTAACATGTTGTATATAAAATTATGAGCACAATGCCATCACTTAACTATAACTCTTAAAGATAGCTTAATGACTGTTTATTCTCTTGACCAAATAGACTCATAATAACATATAATTTTAAAAGAAATTTAAATTCTTTCTTCTCTATTGTATTATTTTATACAATTTGCTATTTCTATTTCCTTCTCATATTGATTATTCTAAATACTATGCAATAATATAACTTAGAGTTCCACGGTTTGTTTACACATTTCCTGTTGTACATTTAGGTTATTCAAAGTTTTCAGCTCTTTTAAAATTGCTCTGAATAAGTTCTAGTGAGTGAGTTATGGTGCTGGCTATATTTTGCTAAACTGCCCTCTCAAATGTTGCTAGGAATTCATACTGCGAAAAGCAATGAATAAGCATGCCTGTTTTCCCATGGCCTTGCTTGCCAGAATTTGACTTTTATTATGATAATCAGTGTAAAATGATATACTACTATTGCTTGTATATTGTGGTATACGGTGTCAGGTTTCAGGGTTTTTTTTCAACGTTAAATATTCTAGAAACTTTCTGAAATAATTTCTGTTTAAAAATATTGAATATTTGCTTCATTTCAAATACTCCCTTTTGACAAAAAAACTTAGGTATAACTGTTGATGAAAAACCAGAAAAAAGTCCAGAACTCTTTGGTGACTCCAACTATGGATAGCTTATTTTGAAAAAGGAGAATTGCAAATTTTACCAAAAGATGGAGAAAAGCACATTAAAAAGATACCAACATTCAGAAATTCATTTCAGCATGTTATTATTGGAAATTATTTAAACTAATTTAGATAACTATAAGATACTTATTGTCCATTTATACCCTGTAAAGCCGTTTTAGAATGTAATATTTTAGGTAATCCAAAATGTACTAAATTAAATTCATTTTTAGTTATGAGAAATCTTTGCTTATATGACAAATGAAAAGAATAACAAGTTGTCAAATGAAAAGAATGACATTGAAACATTTGTATTGTCTCTTCTTAAACTATCTTATTGACTTATTATTTAAGCCTTTTAATACTAAGTATGAAACAACCTATGGTCTGGAAATTTGTATCGCAAAGCTATATGTGCATATGTTATTTAATTCATCTAATGCTACACAAAAGCATAAAATAATGATTTTTCACTCTCTTTAAAAATACTAAATCATTTATGTCCATTTCTCAATTTTTTCATTGATCTATGCTTTGAGTTTGCTTTCTCAACATTATTGTATTTTCCACTTATTATTACTGTATAACATATGCTAGTGTTTAGTTGGATTAATCTTACCTAAAAGTACTGAAAAATGCTTTTTAGTACTTTTTCATATTTTATACATTTATTTTCCGAATGTATCATTGAATAATTTTATTGAGTTATAAAAGTATCTTATTGCTATTTAATAAAAAATTAACACATAAAATGACTTGAATTGTCATCATTCTTTTTAAGATATTTAGTTAAACTGACTTAATGTATGGCCTTCAATTTTTTTGTGTCCTTATTTTTCTGATCATTTCTCCTTTTATAGTTTACATTAAGTCTGATCTCATATTAATTACATTTTCTCATCTGTTGTTACTAATAAACATGGCATAATGTTACTTACAAATGTATTATCTACAAGTAGTGCTATCCACAAATATATTCAAATGTTCCCTTTTAATGTTTGTCATTTTTTTCATGTGTTGTTAATGATTCTTCCATGTG'

In [11]:
def find_occurrences(s, ch):
    return [i for i, letter in enumerate(s) if letter == ch]

def replace_char(s, i, c):
    return s[:i] + c + s[i+1:]

find_occurrences('AATAT', 'T')

[2, 4]

In [12]:
seqs_to_align = [
    str_to_fasta_seqrecord(
        f'{sample}|{fragment}|{i}',
        sequence
    )
    for sample, fragment_info
    in samples.items()
    for fragment, sequences
    in fragment_info.items()
    for i, sequence
    in enumerate(sequences)
]



In [13]:
from tgsts.sequtils.fastio import write_fasta
from tgsts.sequtils import str_to_fasta_seqrecord
from tgsts.sequtils import clean_sequence

write_fasta(
    'erap_seqs.fasta',
    seqs_to_align,
    force=True
)

write_fasta(
    'erap_reference.fasta',
    str_to_fasta_seqrecord('ref', clean_sequence(erap_reference)),
    force=True
)



In [14]:
#Run minimap
import subprocess

subprocess.run(
    [
        "minimap2",
        "-a",
        "erap_reference.fasta",
        "erap_seqs.fasta"
    ],
    stdout=open("erap_frags_aligned.sam", "w"),
    check=True
)


[M::mm_idx_gen::0.003*1.98] collected minimizers
[M::mm_idx_gen::0.006*2.34] sorted minimizers
[M::main::0.006*2.34] loaded/built the index for 1 target sequence(s)
[M::mm_mapopt_update::0.007*2.24] mid_occ = 43
[M::mm_idx_stat] kmer size: 15; skip: 10; is_hpc: 0; #seq: 1
[M::mm_idx_stat::0.007*2.18] distinct minimizers: 8894 (98.97% are singletons); average occurrences: 1.025; average spacing: 5.278; total length: 48105
[M::worker_pipeline::1.713*2.93] mapped 660 sequences
[M::main] Version: 2.22-r1101
[M::main] CMD: minimap2 -a erap_reference.fasta erap_seqs.fasta
[M::main] Real time: 1.720 sec; CPU: 5.025 sec; Peak RSS: 0.175 GB


CompletedProcess(args=['minimap2', '-a', 'erap_reference.fasta', 'erap_seqs.fasta'], returncode=0)

In [15]:
#Load aligned

from utils import build_full_length_alignments

ref_aln, subread_strings = build_full_length_alignments('erap_frags_aligned.sam', 'erap_reference.fasta')

piped_ref_aln, piped_read_alns_strs = add_pipes(erap_reference, ref_aln, subread_strings)



In [16]:
class Alignment:
    def __init__(
        self,
        ref: str,
        ali: str
    ):
        self.ref = ref
        self.ali = ali

        self.rrs = identify_rrs_ali(ali)
        self.seq_rr_masked = mask_rrs(clean_sequence(ali))




In [17]:
#Generate alignments
def generate_alignment(key, ali):
    return Alignment(piped_ref_aln, ali)

args = [
    (k, ali)
    for k, ali
    in piped_read_alns_strs.items()
]

res = run_concurrently(
    generate_alignment,
    args,
    mode='process',
    batch_size=10
)


piped_read_alns = {
    tuple(*[r.args[0].split('|')]): r.result
    for r
    in res.successful
}

Processing: 100%|██████████████████████████████████████████| 660/660 [01:44<00:00,  6.34it/s]


In [31]:
class OverlapError(Exception):
    def __init__(
        self,
        message,
        sample,
        frags_left,
        frags_right,
        overlaps
    ):
        super().__init__(message)
        self.message = message
        self.sample = sample
        self.frags_left = frags_left
        self.frags_right = frags_right
        self.overlaps = overlaps

        self.write_output()

    def write_output(self):
        out_path = f'./out/fails/{self.sample}.txt'

        if os.path.exists(out_path):
            os.remove(out_path)

        with open(out_path, 'w') as f:
            f.write(self.sample+ '\n')
            f.write(self.message + '\n\n')

        if_left = self.frags_left[0].fragment
        if_right = self.frags_right[0].fragment

        overlap_lines = []
        overlap_lines.append(f'         \tCoding\tNonCod\tRR    \tKmer')
        for (i, j), overlap in self.overlaps.items():
            overlap_lines.append('\t'.join([
                f'F{if_left}:{i}/F{if_right}:{j}',
                *[
                    str(int(_)).ljust(6)
                    for _
                    in overlap.match_tuple
                ]
            ]))

        with open(out_path, 'a') as f:
            f.write('\n')
            f.write('\n'.join(overlap_lines) + '\n\n')
                    

        ali_lines = []
        msa = MSA(*self.frags_left, *self.frags_right)

        #Get overlapping boundary idx
        i_start = max([
            len(re.match(r'[-|]*', ali).group())
            for ali
            in msa.alis 
        ])
        i_end = len(msa.ref) - max([
            len(re.match(r'[-|]*', ali[::-1]).group())
            for ali
            in msa.alis 
        ])

        ref = msa.ref[i_start:i_end]
        alis = [ref] + [ali[i_start:i_end] for ali in msa.alis]

        #Lose all columns which are just -s
        ref, *(alis) = [
            ''.join(ali)
            for ali
            in zip(*[
                bases
                for bases
                in zip(*alis)
                if set(bases) != {'-'}
            ])
        ]
        
        labels = [
            f'F{if_left}:1', 
            f'F{if_left}:2', 
            f'F{if_right}:1', 
            f'F{if_right}:2', 
        ]
        batch_width = 80

        for i in range(0, len(ref), batch_width):
            s = slice(i,i+batch_width)

            ref_snippet = ref[s]
            alis_snippets = [ali[s] for ali in alis]

            label_ljust = 20
            
            mm_line = ''.join([' '] * label_ljust) + ''.join([
                'v' if len(set(bases)) != 1 else ' '
                for bases
                in zip(*alis_snippets)
            ])

            ali_lines.append(mm_line)
            ali_lines.append('ref'.ljust(label_ljust) + ref_snippet)
            ali_lines.extend([
                label.ljust(label_ljust) + ali
                for label, ali
                in zip(labels, alis_snippets)
            ])

            

        with open(out_path, 'a') as f:
            f.write('\n'.join(ali_lines))
        
class AssemblyError(Exception):
    pass

In [50]:
class Fragment:
    def __init__(
        self, 
        seq: str,
        ali: Alignment,
        fragment: int,
    ):
        self.seq = seq
        self.ali = ali
        self.fragment = fragment

    def merge_aln(self, other: 'Fragment'):
        return MSA(self, other)

In [21]:
class Overlap:
    def __init__(
        self,
        frag_left: Fragment,
        frag_right: Fragment,
    ):
        self.frag_left = frag_left
        self.frag_right = frag_right

        self.match_tuple = self.assess_overlap()

    def __bool__(self):
        return self.match_tuple[0] and self.match_tuple[2]

    def __str__(self):
        return str(self.match_tuple)

    def __repr__(self):
        return f'Overlap{self.match_tuple}'

    def assess_overlap(self):
        overlap_aln = self.frag_left.merge_aln(self.frag_right)

        #overlap_aln.print()

        ref_ali = overlap_aln.ref
        alis = overlap_aln.alis

        #Get overlapping boundary idx
        i_start = max([
            len(re.match(r'[-|]*', ali).group())
            for ali
            in alis
        ])
        i_end = len(ref_ali) - max([
            len(re.match(r'[-|]*', ali[::-1]).group())
            for ali
            in alis
        ])

        #Get union of rr bases
        rrs1, rrs2 = overlap_aln.rrs
        rr_bases_1 = {
            i
            for rr
            in rrs1
            for i
            in range(rr['ali_start'], rr['ali_end'])
        }
        rr_bases_2 = {
            i
            for rr
            in rrs2
            for i
            in range(rr['ali_start'], rr['ali_end'])
        }
        rr_bases_both = rr_bases_1 | rr_bases_2

        #Set to false when mismatch found
        coding_match, noncoding_match, rr_match = True, True, True
        
        for i, (ref_base, *(ali_bases)) in enumerate(zip(ref_ali, zip(*alis))):
            if not i_start <= i < i_end:
                continue
            if len(set(*ali_bases)) == 1:
                continue
            #Mismtch found
            #print(i, ref_base, ali_bases, overlap_aln.get_feature(i), end='\t')
            feature = overlap_aln.get_feature(i)
            if feature == 'coding':
                coding_match = False
            elif feature == 'noncoding':
                noncoding_match = False
            else:
                raise ValueError('unexpected feature found?')

        left_tail_masked, right_head_masked = trim_intersection(
            self.frag_left.ali.seq_rr_masked,
            self.frag_right.ali.seq_rr_masked,
        )
        if len({
            str(left_tail_masked.seq),
            str(right_head_masked.seq)
        }) != 1:
            rr_match = False

        kmer_similarity = - calculate_kmer_distance(
            clean_sequence(alis[0][i_start:i_end]),
            clean_sequence(alis[1][i_start:i_end]),
            7
        )

        return coding_match, noncoding_match, rr_match, kmer_similarity

In [35]:

class MSA:
    def __init__(
        self,
        *fragments: Fragment
    ):

        if len({
            f.ali.ref
            for f
            in fragments
        }) != 1:
            raise ValueError('ref must be identical')

        self.fragments = fragments
    
        self.ref = fragments[0].ali.ref
        self.alis = [f.ali.ali for f in fragments]
        self.rrs = [f.ali.rrs for f in fragments]

    @classmethod
    def from_fragments(cls, *fragments: Fragment):
        return cls.from_alis(*[
            frag.ali
            for frag
            in fragments
        ])
            

    def get_feature(self, i):
        upstream = self.ref[:i]
        pipe_count = upstream.count('|')
        if pipe_count % 2 == 0:
            return 'noncoding'
        else:
            return 'coding'

    def consolidate(self):

        #Sort by i_fragment
        msa_sorted = MSA(sorted(
            self.fragments,
            key = lambda x: x.fragment
        ))

        #Counts of each fragments
        fragment_counts = Counter([
            f.fragment
            for f
            in msa_sorted.fragments
        ])

        #Can only align if there is max 1 seq per fragment
        #Frags must also be contiguous
        i_fragment_prev = None
        for i_fragment, count in fragment_counts.items():
            if count > 1:
                raise ValueError(
                    'Cannot consolidate unless only 1 poss seq per fragment'
                )
            if (
                i_fragment_prev 
                and (i_fragment - i_fragment_prev) > 1
            ):
                raise ValueError(
                    'Fragments must be contiguous and overlapping'
                )
            i_fragment_prev = i_fragment
                    
        #Always choose fragment with higher read count

        #Sort fragment indexes, higher read count first
        fragment_i_priority = [
            fragment.fragment
            for fragment
            in sorted(
                msa_sorted.fragments,
                key = lambda f: f.read_count,
                reverse=True
            )
        ]

    def print(self):
        ali_len = len(self.ref)
        for i in range(0, ali_len, 80):
            print(
                self.ref[i:i+80],
                *[
                    ali[i:i+80]
                    for ali
                    in self.alis
                ],
                sep='\n',
                end='\n\n'
            )


In [28]:
class FragmentPair(list):
    def __init__(
        self,
        frag1: Fragment,
        frag2: Fragment,
        fragment: int
    ):
        self.frag1 = frag1
        self.frag2 = frag2
        self.fragment = fragment

        super().__init__([frag1, frag2])

    def product(self, other: 'FragmentPair', enum=False):
        if not enumerate:
            for a, b in product(self, other):
                yield a, b 
        else:
            for (i, a), (j, b) in product(
                enumerate(self), enumerate(other)
            ):
                yield (i, a), (j, b)

class Tiling:
    def __init__(
        self,
        sample: str,
        *fragment_pairs: FragmentPair
    ):
        self.sample = sample
        self.fragment_pairs = fragment_pairs
        self.assemble()

    @classmethod
    def try_init(
        cls,
        *args,
        **kwargs
    ):
        try:
            return cls(*args, **kwargs)
        except (OverlapError, AssemblyError) as e:
            print(f'{type(e)}: {str(e)}', end='\n\n')
            return None

    def assemble(self):

        possible_assemblies = PossibleAssemblies()
        
        for frags_left, frags_right in zip(
            self.fragment_pairs[:-1],
            self.fragment_pairs[1:],
        ):
            i_f_left: int = frags_left.fragment 
            i_f_right: int = frags_right.fragment 
            overlaps = {}
            for (i, frag_left), (j, frag_right) in frags_left.product(
                frags_right, 
                enum=True
            ):
                overlap = Overlap(frag_left, frag_right)
                print(f'- A{i}/A{j}', overlap.match_tuple, bool(overlap))
                overlaps[(i,j)] = overlap

            acceptable_overlaps = {
                k:v
                for k, v
                in overlaps.items()
                if v
            }

            if len(acceptable_overlaps) == 0:
                raise OverlapError(
                    f'No overlaps found between F{i_f_left} and F{i_f_right}',
                    self.sample,
                    frags_left,
                    frags_right,
                    overlaps
                )
            if {_[0] for _ in acceptable_overlaps.keys()} != {0, 1}:
                raise OverlapError(
                    f'F{i_f_left} could not overlap both frags to F{i_f_right}',
                    self.sample,
                    frags_left,
                    frags_right,
                    overlaps
                )
            if {_[1] for _ in acceptable_overlaps.keys()} != {0, 1}:
                raise OverlapError(
                    f'F{i_f_left} could not overlap to both F{i_f_right} frags',
                    self.sample,
                    frags_left,
                    frags_right,
                    overlaps
                )

            #If there are three possible overlaps, one is always
            #removeable as one fragment has an unambiguous choicfe
            if len(acceptable_overlaps) == 3:
                f_left_counts = {0: [], 1:[]}
                for _i, _j in acceptable_overlaps.keys():
                    f_left_counts[_i].append(_j)
                #Find the fragment which has two options
                greedy_frag = 0 if len(f_left_counts[0]) == 2 else 1
                modest_frag = int(not greedy_frag)

                del acceptable_overlaps[(
                    greedy_frag, 
                    f_left_counts[modest_frag][0]
                )]
            #If there are 2 possible overlaps, perfect and unambig
            #If there are 4 ambigs, this is am ambiguous overlap.
            #This may or may not be a problem, depending on whether there
            #are polymorphisms both upstream and downstream that become
            #ambiguous due to the overlap ambiguity in this frag
            #Either way, here we add all possibilities
            

            possible_assemblies.update(list(acceptable_overlaps.values()))
            
            print()
        print(len(possible_assemblies), 'poss assemblies')

        assemblies = possible_assemblies.get_final_assemblies()
            
class OverlapLinkedList:
    def __init__(
        self,
        head: Overlap,
        tail: 'OverlapLinkedList' = None
    ):
        self.head = head
        self.tail = tail

    def update(
        self,
        new_head: Overlap
    ) -> 'OverlapLinkedList':
        if self.head.frag_right is not new_head.frag_left:
            raise ValueError('New head doesnt match list')
        return OverlapLinkedList(
            new_head,
            self
        )

    def __iter__(self) -> list[Overlap]:
        '''
        return list of overlaps corresponding to the tree 
        '''
        
        frag_list = [self.head]
        
        if self.tail is not None:
            frag_list.extend(list(self.tail)[::-1])
            
        return iter(frag_list[::-1])

class PossibleAssemblies(list):
    def __init__(self):
        self.overlap_lls = []
        super().__init__(self.overlap_lls)

    def update(self, new_overlaps: list[Overlap]):

        #If no present linked lists, create them
        if not self.overlap_lls:
            for overlap in new_overlaps:
                self.overlap_lls.append(OverlapLinkedList(overlap))
            super().__init__(self.overlap_lls)
        #Else, assign them
        else:
            new_overlap_lls = []
            for ll, overlap in product(self.overlap_lls, new_overlaps): 
                if ll.head.frag_right is overlap.frag_left:
                    new_overlap_lls.append(ll.update(overlap))

            self.overlap_lls = new_overlap_lls 
            super().__init__(self.overlap_lls)

    def get_final_assemblies(self):

        unique_masked_sequences = {}
        
        for ll in self.overlap_lls:
            overlaps = list(ll)

            frags = [
                overlaps[0].frag_left, 
                *[
                    overlap.frag_right
                    for overlap
                    in overlaps
                ]
            ]
            unique_masked_sequences.setdefault(tuple(
                f.ali.seq_rr_masked
                for f
                in frags
            ), []).append(overlaps)

        print(len(unique_masked_sequences), 'final assembly')
        print('\n\n')

        if len(unique_masked_sequences) > 2:
            raise AssemblyError(
                'More than 2 possible sequences - unable to tile across frags'
            )
        elif len(unique_masked_sequences) == 1:
            possibilities = next(iter(unique_masked_sequences.values()))
            chosen_frags1, chosen_frags2 = self.select_two_alleles(
                possibilities,
                possibilities
            )
        else:
            possibilities = [
                ps
                for ps
                in unique_masked_sequences.values()
            ]
            chosen_frags1, chosen_frags2 = self.select_two_alleles(
                *possibilities
            )

        return chosen_frags1, chosen_frags2

    def select_two_alleles(
        self,
        poss1: list[list[Overlap]], 
        poss2: list[list[Overlap]]
    ):

        acceptable_pair_scores = {}
        #Find best combination of overlaps that only consumes each frag once
        for overlaps1, overlaps2 in product(poss1, poss2):
            #Skip if any overlapping fragments
            fragment_ids_1 = {
                id(frag)
                for overlap
                in overlaps1
                for frag
                in (overlap.frag_left, overlap.frag_right)
            }
            fragment_ids_2 = {
                id(frag)
                for overlap
                in overlaps2
                for frag
                in (overlap.frag_left, overlap.frag_right)
            }
            if fragment_ids_1 & fragment_ids_2:
                continue
            acceptable_pair_scores[(tuple(overlaps1), tuple(overlaps2))] = sum([
                overlap.match_tuple[3] #Sort on kmer distance
                for overlaps
                in (overlaps1, overlaps2)
                for overlap
                in overlaps
            ])

        #Get selected overlaos
        overlaps1, overlaps2 = sorted(
            acceptable_pair_scores.items(),
            key=lambda x: x[1],
            reverse=True
        )[0][0]

        frags1 = [
            overlaps1[0].frag_left, 
            *[
                overlap.frag_right
                for overlap
                in overlaps1
            ]
        ]
        frags2 = [
            overlaps2[0].frag_left, 
            *[
                overlap.frag_right
                for overlap
                in overlaps2
            ]
        ]
        for frags in [frags1, frags2]:
            for frag in frags:
                print(id(frag), end='\t')
            print()
        #print(frags1)
        #print(frags2)
        print('\n\n\n')
        
        


        

        return frags1, frags2


                    

            
        
        
        


In [23]:
directory = "out/fails"

# Creates directory and intermediate folders if they don't exist
os.makedirs(directory, exist_ok=True)

In [26]:
sample_fragment_pairs = {}

for sample_id in samples:
    sample_fragment_pairs[sample_id] = []
    for i_fragment in samples[sample_id]:
        fragment_seqs: list[str] = samples[sample_id][i_fragment]
        fragments = [
            Fragment(
                sequence,
                piped_read_alns[(sample_id, str(i_fragment), str(i_seq))],
                i_fragment
            )
            for i_seq, sequence
            in enumerate(fragment_seqs)
        ]
        fragment_pair = FragmentPair(*fragments, i_fragment)
        sample_fragment_pairs[sample_id].append(fragment_pair)
    sample_fragment_pairs[sample_id].sort(
        key = lambda x: x.fragment
    )

In [32]:
sample_tilings = {
    sample_id: Tiling.try_init(
        sample_id,
        *fragment_pairs
    )
    for sample_id, fragment_pairs
    in sample_fragment_pairs.items()
    #if sample_id == 'AMALA'
}

- A0/A0 (True, True, True, 0) True
- A0/A1 (True, True, True, 0) True
- A1/A0 (True, True, True, 0) True
- A1/A1 (True, True, True, 0) True

- A0/A0 (True, True, True, 0) True
- A0/A1 (True, False, True, -4) True
- A1/A0 (True, False, True, -4) True
- A1/A1 (True, True, True, 0) True

- A0/A0 (True, True, True, 0) True
- A0/A1 (True, False, True, -1) True
- A1/A0 (True, False, True, -1) True
- A1/A1 (True, True, True, 0) True

- A0/A0 (True, False, False, -45) False
- A0/A1 (True, True, True, 0) True
- A1/A0 (True, True, True, 0) True
- A1/A1 (True, False, False, -45) False

16 poss assemblies
4 final assembly



<class '__main__.AssemblyError'>: More than 2 possible sequences - unable to tile across frags

- A0/A0 (True, True, True, 0) True
- A0/A1 (True, True, True, 0) True
- A1/A0 (True, True, True, 0) True
- A1/A1 (True, True, True, 0) True

- A0/A0 (True, True, True, 0) True
- A0/A1 (True, True, True, 0) True
- A1/A0 (True, True, True, 0) True
- A1/A1 (True, True, True, 0) True

-

In [None]:


sample_tilings = {
    sample: Tiling.try_init(
        sample,
        *sorted([
            FragmentPair(
                *[
                    Fragment(
                        sequence,
                        piped_read_alns[(sample, str(fragment), str(i))],
                        fragment
                    )
                    for i, sequence
                    in enumerate(samples[sample][fragment]) 
                ], 
                fragment
            )
            for fragment
            in samples[sample]
        ], key = lambda x: x.fragment)
    )
    for sample
    in samples
    if not print(sample)
    #if sample == 'AWELLS'
}



In [None]:
sample_tilings_succeeded = {sample: tiling for sample, tiling in sample_tilings.items() if tiling is not None}

len(sample_tilings), len(sample_tilings_succeeded)

In [None]:
sample_tilings