In [1]:
import pandas as pd
import numpy as np
import os
from uuid import uuid4
from itertools import product
import concurrent.futures
from datetime import datetime
import re
from collections import namedtuple, Counter
from copy import deepcopy

from tgsts.sequtils.trimming import trim_intersection
from utils import build_full_length_alignments, add_pipes
from tgsts.sequtils import str_to_fasta_seqrecord
from tgsts.sequtils.kmers import calculate_kmer_distance
from tgsts.sequtils.rra import identify_rrs_ali, mask_rrs
from tgsts.utils.parallel import run_concurrently
from tgsts.typeclasses import MismatchList
from tgsts.libs import ANTypingLibs
from tgsts.align import mm_profile_from_seqs
from tgsts.typeclasses import ExonDict
from tgsts.align import exonic_alignment


from sfat import Annotator 

In [2]:
path = './erap_tile_test_new.xlsx'

df = pd.read_excel(path).replace({np.nan: None})
df.head()


Unnamed: 0,Samples,Library ID,Method,Fragment,NumReads,Fragment Name,cds_mismatch_list,gDNA_mismatch_list,analysis_code,Sequence
0,AMAI,ERAP1_BCAv2_11,pbAA,1,103,1a,No_MM,"5utr:-296delACACACACACACAC>, intron1:2595delT>...",1,GAAAGTCCTGGGGGCCACCTCTAACCCACCTTCCTCCTCTACAGCA...
1,AMAI,ERAP1_BCAv2_11,pbAA,1,122,1a,No_MM,"5utr:-331delA>, 5utr:-296delACACACACACACACACAC...",1,GAAAGTCCTGGGGGCCACCTCTAACCCACCTTCCTCCTCTACAGCA...
2,AMAI,ERAP1_BCA_22,laa,2,179,2a,No_MM,"intron5:13741delA, intron11:20063C>A, intron13...",1,TTTGTGTTCCTCTAGTGCTGAGATTTCTTATTCTTGTGGAGGTATC...
3,AMAI,ERAP1_BCA_22,laa,2,213,2a,No_MM,"intron5:13741delA, intron13:22270insT, intron1...",1,TTTGTGTTCCTCTAGTGCTGAGATTTCTTATTCTTGTGGAGGTATC...
4,AMAI,ERAP1_BCA_28,laa,3,171,3f,exon15:2285C>G,"intron11:20063C>A, intron13:22270insT, intron1...",1,AATGAAACTATAGATAACAATTATTTCTATTATCTTTTCAGGGAGC...


In [3]:
df[df['Samples']=='AMALA']

Unnamed: 0,Samples,Library ID,Method,Fragment,NumReads,Fragment Name,cds_mismatch_list,gDNA_mismatch_list,analysis_code,Sequence
10,AMALA,ERAP1_BCA_12,laa,1,393,1a,No_MM,"5utr:-357delA, 5utr:-323insACAC, intron1:1293d...",2,GAAAGTCCTGGGGGCCACCTCTAACCCACCTTCCTCCTCTACAGCA...
11,AMALA,ERAP1_BCA_22,pbAA,2,77,2a,No_MM,intron11:20096delTG,2,TTTGTGTTCCTCTAGTGCTGAGATTTCTTATTCTTGTGGAGGTATC...
12,AMALA,ERAP1_BCA_28,pbAA,3,715,3a,No_MM,"intron11:20096delTG, intron14:24183insA, intro...",2,AATGAAACTATAGATAACAATTATTTCTATTATCTTTTCAGGGAGC...
13,AMALA,ERAP1_BCA_25_reload,pbAA,4,103,4a,No_MM,"intron14:24484insA, intron18:27923G>A, intron1...",2,GCTTGGCAAAATGTCCTGAAGTCTTGTTGCATAATTTGCTCTCAAA...
14,AMALA,ERAP1_BCA_26,pbAA,5,68,5a,No_MM,"intron19:33709A>C, intron19:35500delT, intron1...",2,TCAAGTCAGTTAATACCCTAAGAATTAGATTTTATTTCTTATTCTG...


In [4]:
#Fill rows
rows = []

prev_filled_row = None
for i, row in enumerate(df.to_dict(orient="records")):
    
    if row['Samples'] is not None:
        prev_filled_row = row
        
    for column, value in row.items():
        if 'Typing' not in column and value is None:
            row[column] = prev_filled_row[column]

        
    rows.append(row)
    

#Filter rows

#Get nonzero analysis codes and remove missing seqs
rows = [
    row 
    for row
    in rows
    if row['analysis_code'] != 0
    and row['Sequence'] is not None
]    

In [5]:
erap_reference = 'GTACAGTGGCCCTTGGTAGTGCAGGAAAGTCCTGGGGGCCACCTCTAACCCACCTTCCTCCTCTACAGCATCTCCCACTGTAGTCATTCTCTACCGAAGCCCCAGAAGGTGCGGCACTTTGCCACGACAGAGTACTGGGTTCATGTTTCTTTCCGAGGCGGGCCAAGAGCTCTCAGCCCACTGGCAGTGGCGAGATGACGGACACCCAGCGAGTCCAATGGGCGTCGAACGCGTCTAGGCTTGGTGGACTTGTCAGCGCCTGCCTGGCTTCGGTCCCCAACTTGAGCACCGGCCCTTTCCTGCATGCCCCTAACCCTCGCAACGCTAAACAGTGAAAAAAAAAAAAAGACAAAAACAAAAAGCATCTCAACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACACGGATCCGCGTTCAGAAAGGCGTGCACTTCCTACGCCTGATCCCCCGCATCGCAACCTCGCAGCTTCCCCGGCGTGCAGCGCTCATTTACCAATTCCCTTCCTGGGAGTTGCGGCTTCCCTCGCTCGGCCCCACTCCCGTTTACCCTTTCCCCAGCTCCCGCCTTAGCCAGGGGCTTCCCCGCCTGCCGCTAGGGCTCGGGCCGAAGCGCCGCTCAGCGCCAGCCTGCCGCTCCCCGGGCTCCACTTTC|ACTTTCGGTCCTGGGGGAGCTAGGCCGGCGGCAGTGGTGGTGGCGGCGGCGCAAGGGTGAGGGCGGCCCCAGAACCCCAG|GTACAGCGCGCTCGAGCCGCGGGTAGGGGACTGCGGGCCGGGAGGAGAGCGCGGCACCCGCCCCTTCCCTGCGCCCGTCAAGTGGGGGGCTGAGGGCCTGGGGCACGGGAGGAGGGAGACGGGGCACGGGAGGAGGGAGACGGGGCGCGGGAGGAAGGCGACGGGGCGCGGGAGGAAGGCGACGGGGCGCGAGACAGGGCGCGGGAAGGGCGGGGGGAGTCGCTGGCTAGGCCCGAGTCCGCGGGGTGCCCGGCGGGTTGGCGGCGGGCCCACCCCTGCCGGTCCCTGTCCCTGTCCCTCCGGGCGCGTGGCCGGTGCGCCTGCTTCACGGGTCTCCCCGCTGTTCGGCCGGCGGGAGCCTCCCTCAGCGCTCCGCCTGGCGCCTGGATGCCTGCCAGTCCTGCAGGCCACCGACGCCCGCGCGAGGCCAAAAGGCGGGGTGGGGCGGGCAGCTGGCTCGGGCTGAGGAGGGCACCTGCCCATAGCTGCTAGAGAAACCCAGAGGCTTTGGGTTAAAGACTCTGGTGGGGTGGGATGCGCGGGCCGTGTGTGTTCTTAAGGTCACTTCCCTCCCTGCTTCTCCTGTTCTTCTGGTCAGCAATTCTCTCTCTCCCCTTCGCTCTGGCTCTGGCTGGGTTTTATTCAGATAAAGCACCTCTGTTGACGCAAATTAAAAGTTTCCTATCTGGGTGCCTCACTGGCCAGGTGGTCCTACAAAGTTAATTCCATGAGGGGAAGGGGGAGAGCACACACTTCCTCACGCTTTTGGATTTCTTTGTGTAGGCTAGGTTCAGAAAGAAATTATCTGTTTCCTATTAAACACCCAGAGGATTCGCTCTGAACTCAGGACGTGGTCAACAATTAACAAAACAACAAAACAAAACAAAACAAAACAAAAAACTTGAAAATTGGGCACAGTTGTCTCTTGCCTGAGGATTTTTAATTAGTATAAGTAGCACATTTTCAGGTGCGGCCTGAATAGAAACATTCTAGTACTTTTTTTTTTTTCAAATTAATCCAGCATTTTTATTATTTACCAACAGTGCTTGTTAATTTCATTGTTCAGGAAATTCTGGAAGAACCTCAATTACTTCTTGATGATCTATTTCATATACTATAGTGCCCCAATAAAAGGAAGGGAGGCAGAGGTTGCAGTGAGCCAAGATCGCACCACTGCACTCCAGCCTGGGAGACAGAGCGAGACTCCGTCTCAAAAAAAAAAAAAAAAAAAAAAAAGAGGGATCTTGGTGAAGCTGAAAAAGCAGGGAGTTTATACTCACACAGATGTGGATTGCATTCCAACAAGTTGTGTGAACTTAGCAAAGTTACATGAATGGTTCTTTGCTTCAGTTACCCCACCTTGAATGAGAATAATAGGCTATTAGAGAGAGTGAGCATGTGTAAAGTGCCTGGCACTTTGGAGGAGCTTAGTAAATATTAGTTCTTTTTCCTCCTTGGCTTCTATTCCTTTAAGTGTTAGTGGAGTGTAACTTCAAAGAGAATTGTACTTCTCTTGGGAAGAGCTGCTTATATTGAGTAGACTACTGTTTTTGAGAGCTTTCTTTTTTTTTAACTGCCTCCCTTTCAGAAAATTGTTAGTAAAACCAGACCCTAGGAGACCAGCCAGAACCATGAAATGCCATGTTTCAAACTGGAACACTTACTATTGGAAGCAAAGCCAAACAAGAGCTAATTTTTCAGGGGAAGAGCCAAACACACGAACATGATCACAGAATCTTGGATGTAGCCTACAGTTTGGGATTAATAAGAGAATTTATTAGTGAAGCCCTTTATTCACTACATGGAGTTTTTACCAAGCCCCACTCATGCACTGCATCCTCGTTGAGACATAACTGTTTCTCTTTGGACCCCTCATGGACCCAACCCTGCAAAGCCTCTGATCCAAGGTCCCGGTACCAACCCCTTCCGCAGCACATCAGCCTTTCTGTCAGCTCATAACGAGTTGGAATTTCTAGATCTTCTCTGGGGCTGTTGGAGAGGTCTCGGGGACTTTCAGAGTCCTTACATGCTTGAACCTGCCACCTTCACAGAGTCCTCTGGATCCCGTCTTGGGGCAGGGGCGATGCTCATTAAGCTGTTGCTGCCAGTAATTCCATATGGAAAGCAAAACACAAGTTCCATTTACTCTCTAGTTCCCCAACTTCAAGGGCAAAAAAATGTTCTCCCTGTTCACACTTCCTGTCTCACCTGGGTGGTGCCTTTTGAACTGGGATTATGAGATTTCCAAGACTCTCTCTAATGTGTAGGTATCCTTTCTGTTTAGCCTCCAGATTGCTCCAGAGGTGAGGAGAAGGGAATTCCCTTGAGCTGTGCATTTGGGAAGGGAGCAAGGAAGTCAGGGGTTAGGGAAGGCACTTCAGCCATTGCCTTGAATTAGTATCCTATCACATAGAGTTGAAGGGGGAAAGCCAGGATTTGGCAAGGATGAGCTTTTCAACCTTGGCTTCTCAGTAAAATCTCTGGACAGTTTTTTTTTTTTTTAAAAAAAAAAACCAAAAACCAGAAATTAAACAATCCGTAAAACCATACCCCGATCTTATCACCATAGGTTCCAGTTTAATTGTTGTCAATAAGGACCCAGGCATCGAAAATTTTAAAAGCTTCCCAGGTTACATTAATATGCAGCCAGAGTTAGGAAACTTGACATCTCAGAGAGGGAGAATTCCATGTACTGTGAACACTTTGAGGGATCCACATTGTAAGCTTGCTACTTTTCCCAACTGGAACACGAGGAGTTTGGGCCAATCACTTGCATTCACCTGGTATGGGCCCCTCTTGGCAAACACCCATTAGAAAGGTGCGTTTGTATAAAAGAAATAAAAACTTATGTTTGATGCTTGGGGCATGGTTTGCCAACTTCCTTAAAATTCACATTGCCTTTTTTTTTTTTTTTTAAAGACAGAGTCTTGCTCTGTTGCCAGGCTGGAGTGCAGTGGTGCAATCTCAGCTCACTGCAACTCTGCCTCCCAGGTTCAAGCGATTCTCCTGCCTCAGCCTCCCAAGTAATTAGGACTACAGGTGCGCACCACCACACCCAGCTAATTTTTGTATTTTTAGTAGAGACGGGATTTCGCCATGTTGGCCAGGATGGTCTCAATCTGTTGACCTTGTGATCTGCCCACCTCATCCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCACCATGCCTGGCCTGAAATTCACATTGCTTTTATGTCTTTAAAATCAATCCAGGTGATTGATACGTTTGCCACAAACTACTGGGAAAATTAAGCTCTTTAAGCTCTTCCTGGCTAAATAAATAGGTAATTAACTTTTGGCATGACAATTTGAGGAAGACACTGATGTTATTAAAGGTTACCATTACATATACTTACAGGGGAAGTGGAGACCTCGTTCTAGTGGTGGCTGCCACTGTGTCAGCATTTGATTTCCAGGGTACTAGGGTGACTTTCTCCAAGGTCCAGTGTCAGTGGGGAGTGGTGCTTGATCAGATGTTCCTCTGATATGGTTCTGGTTTTGCTTCTTTCATGAGCCTGGTTTCCCAAATGTCCTGCAACTCTGTGACCCGTGTAGTGAGCCACTCAGGATCCCCTAATGATTCCTTTTCTGCCTATATCAGCCAGAGCTTGTTCGGTTGCTTTCAACCAAGAACCCTGACAGGTAGAGTTAATTTAAACTTTGAACATCAAATGATGCTTTCTAGTACGTGTTAATGATTGTTGCTAACTGTAAACATCTTTCTTATATGAAACCATAACATAGGGAAGGTCCTTTTACTTTCAGGAAAAGACCTAGTACTTTTGGAAGTTTATGCCTATTTCTGTGAATGCTGGGTGGATACATTCTGAAATTATGCTGTGTCAATAACATTTTAATGACATATATTTTTGCTTTTGTACATTTGTGCCGCTAG|GTAGGTAGAGCAAGAAGATGGTGTTTCTGCCCCTCAAATGGTCCCTTGCAACCATGTCATTTCTACTTTCCTCACTGTTGGCTCTCTTAACTGTGTCCACTCCTTCATGGTGTCAGAGCACTGAAGCATCTCCAAAACGTAGTGATGGGACACCATTTCCTTGGAATAAAATACGACTTCCTGAGTACGTCATCCCAGTTCATTATGATCTCTTGATCCATGCAAACCTTACCACGCTGACCTTCTGGGGAACCACGAAAGTAGAAATCACAGCCAGTCAGCCCACCAGCACCATCATCCTGCATAGTCACCACCTGCAGATATCTAGGGCCACCCTCAGGAAGGGAGCTGGAGAGAGGCTATCGGAAGAACCCCTGCAGGTCCTGGAACACCCCCGTCAGGAGCAAATTGCACTGCTGGCTCCCGAGCCCCTCCTTGTCGGGCTCCCGTACACAGTTGTCATTCACTATGCTGGCAATCTTTCGGAGACTTTCCACGGATTTTACAAAAGCACCTACAGAACCAAGGAAGGGGAACTGAG|GTATTTTTTTTTCTCTTTTTCTTTTAAACTGCAAGTGCTGCCCACGCTAAATTCATTATTTCAGATTGATTGTCTTTTAAAATTCCCTTTGCTGTTGAACTTTTTCTTCAGTTTTGCTTTTGCATCTTCTTTATAGTGTTAAAAATGGCTTTTTCCCTTGCTTTTTAAATCTCATTTTAAAATTCTATTTTAACCAATTTTCTTTCCCCCAGCTCTATCAGAGTAAATATCTATTTGTTTATTTGGTTCGATTTCTGAGACATAATAAACATGTTTAATTTTCCTGAACTGTGTATTAGTTTCCTAGGGCTGTTGTAACAAAGTACCACAGACTGGGTAGCTATAAACAACAAAATGTATTCTCTCTCAGGTCTGGAGGCTAGAAGTCTGAAATCAAGGTGTCAGCAGGCCCGTGCTCCCTCCAGACTCTGGGTAGAATCCTTCCTTATATCTTCCTAGCATCTAGTGGTGGCCGTGGATCCCTGGTACTCCATGCCTAGCACCTGCGTCATTCTAGTTTCTCCCTCTGTTAGTCGCATGGCCATTCTATTTTCCTATGTCCCAGTCTCCATCTTCTTATAAGGAAACCAGACATACCAGATTAGGGCCCAGCCTGGTGGCCTCTTCTTCACTCCATGATACTTGTAAAGACCCTATGTCCAAATAAGGTCACATGCACAGATACTACAGGTTAGGACTTCAGCAAATCCTACTAGCAACATATTAGAGGAAATACTTTTATCTCAGTTAAAACTTTTTTAGAGATCTCTTCTCACCTTGCTTTGGTTCTGTTTTTAAGGAGGAGACTTATTTGGGGGAGATTTTATGCTCAGTTTTAAAATGGAATTTTATTTGTTGGTAGATTATACTAATTTATTTTTCAAATTCCATATTATTTTATCAAGGTAAGAAAGTAAAATTTATTTCACTCATAGCCCCCTGAACTGACCACTACTTCTATTTCACTGGAATATTCTGCCAGACTTCTTTCTGGATATGCATATATATTATTTTATATAAATAGAATTCTTATATTTTCTCTTTTACAAGGAAATTTTTTAACTTAATATGTTGGGAAGATGTTTTATATAAATGACTATTACTAGACATCTTTTTAATGATTTTATTAAAATACATAGTTGTACATTAATATCTTTAATCCCTCGGGAATGGAAATGAAATTGTTTCCAATTTTTCAGTATCAACAACACTTTGATGAGCATTTTTGTAAATTATTTCTTTAGAATAAATGCCTAGAAGTAAAATTGGTAAGCCAAAGAGCCTATAAATTTTTGATAAACTTTGTCATTTTTTCTTACCCCTCACCAATAGTGTATATTGGCAGTCTTTTTAGAGCAAAAAAGGTCCCCAAATAAATGATCTCTTTTTAATTTACGTAACTTTGATTAAGGAAGTTGAGCAGCTTTTGATATGTTTGTTAATGATTTATAGGTTCTTCTTTTATGAATTGCCTGTTAATGACCTTTGACTTTGCCTGAGATTCCCTTGGTTGTTGGGGATTTTTTTTTTTTTTTTTTTTTTTTTTTTGTCGAGACGGAGTCTTGCTCTGTCACCCAGGCTGGAGTGCAGTGGCGCGAACTCGGCTCACCGCAACCTCCGCCTCCCGGGTTCAAGCAATTCTCCTGCCTCAGCCTCCTGAGTAGCTGGGATTACAGACGCGCACCACCACACCCAGCTAATTTTTGTATTTTTAGTAGAGATGGGGTTTCACCATGTTGGCCAGGGTGGTCTCGAACTCCTGTCCTCGTGATCCGCCCGCCTAGGCCTCTCAAAGTGCCAAGATTACAGGTATGAGCCATTTTTAGGATTACATATTTTTAGGATCTCACTATCTGTTAAGGCTATTAAGTTTTCCTCACATACTTCTTAAATGAATTTTCCCTGTTTTTTACTTTTTCTTTTTAACTTTTTTTTTTTTCTTCCCGAGACAGGGTCTGGTTCTGTCGCCCAGGCTGGAGTGCAATGGCGCAATCTCAGGTCACTGAAACCTCTGCCTCCTGGGCTCAAACCATCGTCTCACCTCTGCCTCCCAAGTAGCTGGGACTACAGGCCTGCACCACCATGCCTGGCTAATTTTTGTATTTTTGGTAGAGATGGGGTTTTACCATGTTGCCCAGGTAGGTCTCACACTCCTGGGCTCAAGTAATCCTCCCACCTCAACCTCCCAAAAATGTGTTAGGATTACAGGCATGAGACACCATGACCATGCCCAGTGTAACTCGTCTTTTAACTTAGTTTATGATAGCTTTTGTCAGATGAAATATTTTTTAGTAGCAAAATCCATCAATCTTATAGTAACTTTAGGAATTAAAAAGCAAACACCTGTTTTAGAGTTCCTGGTGCTTCATGCTCAATTTTTATTTCACTTGCCTTAATTTTAG|GATACTAGCATCAACACAATTTGAACCCACTGCAGCTAGAATGGCCTTTCCCTGCTTTGATGAACCTGCCTTCAAAGCAAGTTTCTCAATCAAAATTAGAAGAGAGCCAAGGCACCTAGCCATCTCCAATATGCCATTG|GTGAGTCTGCACTCCTGTGTATTTTCTATAGGAAAATCTACTGATTCTCTGTGACTTGCACTAGCCCAGTGACAGTCAACATTGGGTCACCTGTTTTGTTTTATTGCCTGGCAGATCGTTACTAACTTTTCATTTATAACCTATGCTTTTGTTTCAAGCCATAGTTATATGTAATCAAAGTAAAAATTGCACCTAAAAATGCAAGATTTCAGTAACAGTGCCATTCCAGGTTATACATGCTGATAGGAGGGAAGTGGTATAAGAAATTCAGGTCAGGTTTAAATATTAGTGCCCTTCACAAAGCACTTTCACCCTCATTTTCTCATATGATCTTTTAAAATGATTTTAAAGGTAATGCTTTGTCAATAAGGCCATATTTTTAGCATACAGTTATTTTCTCTAAGTTACATATACTATATAGTAATATTCATTATATAATTTGTAACTGAATGTACAAAATTGGGCAGACAGAAAAAGAGAATAAAAGTAATCTTTTCCAAATATTATGGTGCTGAAGGTAAGTCATGATAGATAGCTTAGCTTCCAGAGGGAAACTATTATTCCCCAATCTCAATGCGGATGTGGACAGCATTCCCTCTGATTTTTAAAAGTGACTAGAAGATGACCATGCCAAATGAATAAAACTGTTCAGTAAGTGCCATCATCCTTTGATTCTGGTAGTTTAGAAAAGCATCAGCTGGGCCGTCATTCTGCAGCTGGTATATAACACCTCCTGGAAGCACATCCTTTGTTCAGAGAAACTCACTGGGGATCAGAGTCAGAGTAGAATAGGCTTTGCCTAGAGTCCTGAGGGAAGAACAGCTTTGTCCCTGTGCTGACCGGGGAAGCAATATCATAACATGGAGAGATACTGAGAGCCACAGACCAACCTCTAGTGTGGTGCTTCTCAACCTTAAAACTCTATACCTACCTGCCTTCATACCATAAGGATGCCTTCCTCAAACAAGATTCGGATCTCCCACCCCACGCCAGAGGGCTATCCTCTGTAAAAATCACTCTTCTGCAAATTCCTACCAGCCAAGAACTTCTGTCCCCACTCCCACCCTTGTATATGAAAAGACAAGAAACAATTATGCTATTTTCCTAATATAAATTTAATATACAGGATGTGCTTTTTCAAAATATAGTCCTCTCCCCCAGTATTGTGGTATTACTCCCTGGGACAGAAGTGAGTTCTTTAATTGGTGAATCAAAGTTCTAGGAGAATAAAGGACCAGGGATGGGAAGGAGACAGGAGAGAGACTGAAGGACCAAACAGGATTAGTGGAGAAATTTGTAGGCTTTCAGAAGGGAGGCCTGGAGCTTTGGAAGCGCCACAAAGATGCTACAGTCTAAATCCATGGATATCCAGGATCCACTTAGTGAAGATAGGAAAACTTCTTTTTTTTTTTTGAGGATAGGAAAACTTCTAATGCAATGTTGTCCCTTTGGACGGGAATACCTCTCACTAACAGAAATCTAATACGAGTAGCCTGACCTCAGGCTGCAGATATTGAGCTGAGGGGAGAACAATGGGGTCTCAAAAGATCTTTTTGGAGACCAGAAAAACACAATATATACCATTGGAACATTGAAGCTTTTGGGCATGGGGCAGAAATTAATCACATTTAAATTTGAATTAATTTAATCAGGTTATTTTCCTAATAATTAACACAACTCGAGAATGGAAATTTTTGGCCAGGTGTGGTGGCTCATGACTGTAATCTCGGCACTTTGGGAGGCTGAGGCAGGTGGATAACCTGAGGTCAGGAGTTCAAGACCAGCCTGGCCAACATGGTAAAACCCTGTCTCTACAAAAATACAAAATTAGCTGGGCGTGGTGGCACATGTCTGTAATCTTAGCTACTTGGGGGGCTGAGGCAGGAGAGTCGCTTGAACTCTGGAGGTGGAGGTTGCAGTGAGTCAAGATTGTGCCATTGCACTCTAGCCTGGGTGACAGAGTGAGACTCCATCTCAAAAAAAAAAAAAAGGAAATTTTTGTTGTAGGTAGGCAGAAGCAGAATGCATTTAAAAAGAAAAGATGATTTGGGATCCTTTATGAGTAATCCTAGGCTGGGTAGCAGAGTTGGTTTGAATGACCAAATAGTGACCAGAAGTTGGTGGCTGATGGGTATTAAGAAGGATGAGGGCCAGGTGAGGTGGCTTATGGTTGTAATCCTAACACTTTGGGAGGCAGAAGAAGAGGATTTCTTGAGGTCAGGAGTCCAAGACCAGCCAGGGCAACATAGCAAGACCCTATCTCTCAAAACAAAAAAAAAAGATGAGGTCAGAGCAATAGAGGTAAGTATTGGATTACAGGAAAAATGCCCGTGACCATGGTTTCACCCAGCTAATTCTGGCTGGTTCTTTTTCCATCTCCGTGCTTTTTATTGCTGACGTGTTAGACTTTCTTCTTTAGGGGCAGACCTCTAAGACTGTACCTCCATCAACTATACCCCACCCTTACTCTCTGATTGCACTTAAAAAGGTGATTCCAATGAAGCAAATGAAGCAAATCTTTTTTTTTTTTTTTTTTTGAGATGGAGTCTCGCTCTGTCATCCAGGCTGGAGTGCAGTGGCGTGATCTCGGCTCACTGCAAGCTCTGCCTTCCGGGTTCATGCCATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGACTACAGGCACCTGCCACCACGCCCGGCTAATTTTTCATATTTTTAGTAGAGATGGGGTTCCACCGTGTTAGCCAGGATGGTCTCAATCTCCTTACCTTGTGATCCACCCGCCTCGGCCTCCCAAAGTGCTGGGATTACAGGTGTGAGCCACTGTGTCCGGCCCAGGTTACTTTCAGTTATACAGCAGAACAGAAGCTCTTTTAGGTACTACAGGGTTATATATTTTCCCCGTTGCGTATATGCTCAACAGCTCGACATTGCATTGCCAGATAATTCTCAAACCTGTATTTAAGGAAAAGTGGATCAGCCACATCTTGGCAAAACTCACAATTTCAGTTTTGCTTTGTCTCATCCGTGTTATCAATCCACATATGCCAAATGTGGATTTACAGTGTATTGTAAACTTTAAAATGGTAAGTTGTATGGTATATGAATTATATCTCAATAAAAAAGAAATTGAGTAGAACTGTTTGACGTTAATGTCTAAATTATAATTAGACATTGGAAAGATAACTTTTAAAGTAACTATAGAAGCGTCATTAGACAGGGTCTGGCTCTGTCATCCAGGCAGGAGTGCAGTGGCTCAATCTTGGCTCACTGCAACCTCCACCTCCCAGGCTCAAGCCATCCTTCCACCTCAGCCTCCCCAGTAGCTATGACTACAGGCACGCACCACCAGCAGGACTAATTTTTGTATTTTTTTTGTAGAGATAGGGTTTCACCATGTTGCTTAGGCTGGTCGCAAACTCCTGAGCTCAAGCATTCTGCCTACCTCGGACTCCCCAAGTGCTGGGATTGATACACTTTTAATATTATGTCTGATAATTAGGAAATTTATCATGTTCACTGTATTGGATAATTGGATTACTTGATAATTTGAATTATTCTGATTTTAG|GTGAAATCTGTGACTGTTGCTGAAGGACTCATAGAAGACCATTTTGATGTCACTGTGAAGATGAGCACCTATCTGGTGGCCTTCATCATTTCAGATTTTGAGTCTGTCAGCAAGATAACCAAGAGTGGAGTCAAG|GTGAGCCTATGACTGTCACATATGGTGACCAGCTTGTTCTGGTTTGCTTGGAACTGGTTTTAAAACTGGAAGTCTGCCTGAGCGCAGTGGGTCGTGCGTGTAAACCCAACATAAACCCAACAGTTTGGGAGGCTGAGGTGGAAGAATCACTTGAGGCCAGGGGTTTGAGACCAGCCTGGACAAAATAGTGAGAACCTGTCTCTGCAAAAAATAAAATAAAAAAATTAGCCAGGCATGGTTCCTTGTGCCTGCTACTAGTCCTAGCTACTAGGGAGGATCCCTTGAGCCCAGGAGTTTGAGGCTTCAATGAGGTATGATTGTGCACTCCAGCCTGGGCAACAGAGCAAAACCATGTCTCTAAACAAACAAACAAAGACAAAACCAAATACCAAAATCCTGGAAGTCCTGCATCCTGGGAACCTTCTCAATCTCAGGCAAACTGGGATGGTTGGCCAGCCTGTTGTCACGGATGCTCATTTGTATAGTGAGGTTCTAATAACAACAACGTGGAGAGAGTGTGGCCTGGCCTGAGTCATGATCCTGGCTTCACTGCAGTCACTTCACTGACTCTCTGACCTTGGCCCTATTCCCTCTGAAACTTAGTATTTACTTCTTTGGAAGGTATAACTTGGACTAGATCCTGCAATGGTCTCTAAGGTTGCTTCTGGTTATGGCTTTCTGCAGTTTGGAAGTAAATGTTACTATCTGGCAGGGGATTTCTGGCTATGGTAAGGAAGATAGAGCAACCTGCTTGGAATACCCAAAGGCTTTGGGCCAGGTACACTGGAATGCTGGAGAGAAAAATCTTGTTTCAAGGCACACTTGTTCCTCATTTGGGTACTGTTGCATAGTGGGCAACCTATTCAACTGTGTGCCGTAGCTCAGAATGCAAACAGGTTTTTCTGAGGGGAGGAAGGGATGCTTTGTTTGAAGATACCTTATGTGTTTGTGCTGGTTTTCACTGAGGCCTGAATAGATGGGGATTCCCTGCTGAATTGCTTTGTGTTCCTCTAGTGCTGAGATTTCTTATTCTTGTGGAGGTATCTTTACTTAACTGGGGATTTGAAGGTGACGCTTGAGACTCGGATGAAGGGAACATTCTTAATTCAGCAGTGAAACTATCAGCAAAAACACCCGCCCATTGCTTTGCCACTTATCTGAATCTCTTAGAAATGATTATTTTAGTAATGTCTAATCTATATTAATATTTTTAATTCTTTCATTTCTTTAAACACATTAAGCATACAATTATATATCTGTGTCTGGTAATTGTTTTATCTGAATTCTTTGTGTATCTGATTTTGTGGTTCGTTGTTTCTGCTGGCTCTTGCTTATGGTATCTTGTTTCCTTGTTTGTATTATGAATTATGTTTGTGAGCTTACGTTGCCTGAGTCTAAAGTGGATTATTCCAGAGAGAAATTGTATTTGCTCTTACAGAGTGTCTGGGAGTACTTACTGGTCCAGGGATCACTTTACTTGTAGTTTCCTTGAGAAAGGGTAGTTATTTCTAGTTTACCTTTACATTAAAGGCCTGGCCTTTGGGTACTAGCTTTATGCAGGGATTGTATGTCCTGTTAGACTTTCTACTTTGGGCAGGCCCTGGACTTGGTCTCTTAACTCCTGAGTCCTTCAATGACATAAGAACCAAAGCTCAAGTCCAGCTGTGTTGGGCTAGTGCCGGCAGGGTTAAAGCTGGCTGCAGTGCTCTCCTGACATCAGAGGGTCTAACTGTCATTTCACTTTGGCTTCTAAATCTTTCTTTCTCATTTGCCATCTTATAAACACATTTAAGAACACTTTATACATGTTATCCAGCATTTGTTGTTGTTTTCAGAAGGGGGATTAATCAGGAACAGTCAGTATTAATGCAAGAAATGGAATTCCCAATTATTTTCTTTAATATTGGCAACCATATCCCACAATATGAAGACATTAATGTCAGTCTTCTACACAATGTGGGGAGAGAAGCCAGTTAAGATATTTGAATTCCTTTCTGTGCCTTTCTCTTTAG|GTTTCTGTTTATGCTGTGCCAGACAAGATAAATCAAGCAGATTATGCACTGGATGCTGCGGTGACTCTTCTAGAATTTTATGAGGATTATTTCAGCATACCGTATCCCCTACCCAAACAAG|GTAGAGATTTTGCACAGATATTACACATGACATTTGATGAACACAGTCATAGATTTGTCATTATAATTGGCACATCCCTGTAGTTGCCTCAGCAGCCCCTCAAGCCACAAAAACCCCAGCAAGTGACAAACCTGCGGTTGATCTTTCTGAGCATCTCCTCACCCTTGATGAGTACAGTAACTTCTAGTGATAGTGAAGAAAGCAGATCTTCATAGAGTTCTTGAGGCATATGGCATGGGGACTCTTTTGCCTTCTGATTTTATTAGTGGGCAGACAGCAGAGGGAAGAGGCTACATTTTTTCTTTACTGGCACCTGCTTGGCAGGAACCCAGAGGATGCTCAACAAACTGTTTTGAATGAATAAATTTATATAGTGTAAGACAATCTGAATTTTCTTTCTTACACAAGCCCTTAAAACTATTGTATGTTTGATTTTTTAGGTATATGTGGGTCTTGGGCATCCAAAATAGAATGGATTATTATAATTGTTTAGTTTTTCAATTTCCAAACTCCTCATCAGAAGGTTAGAAATGGAGTCAAGAGGCCTGAAAAGGCGGGCACGGTGGCTCATGTCTTGTAATCTCAGCACTTTGGGAGGCCCAGGCAGGTGGATTACTTGAGCCCAGGAGTTTGAGGCCAGCCTGGGCAACATGGCAAAACCCTGTTTCCACAAAAGTACAAAAATATTAGCTGAGTGTGGAGGTGCACTCTTGTAGTCCCAGCTACTTGCAAGGCTGAGATGGGAGGATCACCTGAGCTTGGGAGGTTGAGGCTGCAGTGAGGTGTAATTATACCACTGCAAATGCACTCCAGTATGGGTGACAGAGTGGGACCTTGTTTCCAAAAAAAAAAAAAAAAAAAAAAAAAAAAGGCCTGAGATGCAAGTCTGACTTTGCCACATTTATAGCAAAGTGATGCTGAGTCACTAAGCCTCTTTTTCTCCATTGTGAAAGGTTCTTCCAGTCTAGAGCTCCATGATTATGCACGGGCAGCTGGTTCAAACACCTATCCATTCTGAAGATTAGTGTTTGGGAGAATGTATAGCTTAGAGACTGGTAATGTATTTTATTACTTCCTTCCCAAG|ATCTTGCTGCTATTCCCGACTTTCAGTCTGGTGCTATGGAAAACTGGGGACTGACAACATATAGAGAATCTGCTCTGTTGTTTGATGCAGAAAAGTCTTCTGCATCAAGTAAGCTTGGCATCACAATGACTGTGGCCCATGAACTGGCTCACCAG|GTATAAGCTCATTCACACTTTTAATAAAGTATAAACTACATTTATATTGCTTCTATGGGACATATAAGGCTATTTATATAATTTTTACTTTGTCTTTTTTTAATAGGAAAAATTGTTTCTCCAAAGCATTCGTTTTTATGTCTTATAATGCATGTTGAACTTTTTTTATTTTTACCTTGATTAAATATTGGTCCTGTAAATATATGTTAACATTCATAAACTTATATTGGACATCTAAAATATACTTCTTTCTGAGTGTCTTTGTTTATGGCTTATGTTGTGCTTTTAG|TGGTTTGGGAACCTGGTCACTATGGAATGGTGGAATGATCTTTGGCTAAATGAAGGATTTGCCAAATTTATGGAGTTTGTGTCTGTCAGTGTGACCCATCCTGAACTGAAAGTT|GTAAGTAGTTATTTATCCTTCACATTTGAGGTTAATTTGTTGTTTTGTTCAATATTGCTGGAAAATATTCACTAATCTTTGATTATAGAACTTATAAAAATATTTCACTGATAACTTCCTTGACAGTTTAGATATGAATCGTGTTGCAAAAATGCTAGTGAACTTTCAAGATATACCACAAAGACTTATTTACAAACTCAGTTGTGAATCTATGATTGTTTGTAAATTGTCCGATTTTATATTTCTTAATATCAAAGAAATAGAAATAATGGCAGCCTAAATGTTCCATTTCATTTTCCCAACCTTCAGGTTGCTCATTGCAGAATTATTAGGTACAGATTACTGATATCTCAATAGGACCCTACTTGCCAAATCAAATGAGTTATACTTAAGGTAACTGCACATTTGATTGTATAACAACCTAGGCTTCTGGATCACAAATTTATTGCTTTGGAACTGCTATATGGTTTATTTTTTAAATCACCACATTTAACTTAAGAAAATCACATATAGCAAAATTAAACTTATGTAAAAATTTTTTTATCCTACTCCAATGATTCTTTTCATTTGTAAATATTTATTCCTATTTTCATCTGCCTGTATACATTATTTAAAATATACATTTGTGAATATTTATTACTGCCTATTTTCATCTGCCTGTGTACATAATTAGAAAGTACACATCAATATTGCATTATAATCCTAAATATTTTCTTATGTTTCTACATGATCTTTAGTAATAAAAATGATTATGGATACATATTGTCCTGTTGAGCTAATGTGCTCTAATAAAGCTGGTTAAGGTTCTCCATTTATTTTCATTATGTTTTTAAAGGTAAGTATCTTTGCTCTAAAACAGTACAGACAATGATTGGAAATGTTGAAATTACTATACAGTTAATTTCTTGTTGTGTTGCTGTTTGGCTATAGGCATAATTGTTTCGTTTTTAGATTAATACGAAATTTTCTTTATTCTAAAGGACTTAAGATGAACAAAATCTATAATGTTTAACTAAAATCATTGTTTCTTGGGTAGCTTTTAGAATATATTAATTCTATTTTGTTTCATAAATATTAGGAAATATGGAATAGGTTGCCCCCAAAATGTGAAGTATGGACTTCTTGCCTCAGATAAAATGTCCCACCTTTGACATTTTTTATCTAAATGTAAATCATAGGTGATGTTTTCTTTTTTCTATCTCAATAG|GGAGATTATTTCTTTGGCAAATGTTTTGACGCAATGGAGGTAGATGCTTTAAATTCCTCACACCCTGTGTCTACACCTGTGGAAAATCCTGCTCAGATCCGGGAGATGTTTGATGATGTTTCTTATGATAAG|GTAAAAGTAGATTGAGTATAAGGATACAGTTTAGATACTAAAGTTATACATACTGGGGTGGAGAAGTTATAGGCAAGGTTGTGGGGTTAAACCCAGATTGAATGCCTTCTCTCTTGACACGTGCTGGCTGGAGTCAACTCTTTTAGGACTAACTTGCAGTTTGGCTCATAACCCTAAAGATTATTTTATGGGAAATTCTTCATATATTCTTTCTTGGGTTGAAAATTCATGGCTTACAGAAACTCTGCTTTTATTCATCATTCAACAAATAGGTATTGATTGAGCATCAACTTTGTACTAGACAAAAATTCCTGCCCTCGTGTAGCTTACTTTTCAAGGCCTTCAGCAATGGTTAATATTGTTGAGACGCAAATAATTGTCTTGCACAGTGTGCTTAGTAACAGAGTTGGGAATTGTTGACAACAGTTTAATGATTGGGAGATTTTATGTAAAATCCAGATTTCTAGCTTCTCTTGGGGAAGAAAAAGGAGGATTTGGCCACTGTATGTTCTTTGCAGCATAAGCTGGAGCTAAGTTGTTGTTCTCTCTTTGTAAGATCAAGGCTCTGCTTTTCCACTTTCCCTGCCATTCCCAACTGTTCTATTGTCTTCTCACCGAGGCTGAGAGTGTGTTGCCAGTTACCATTGTGCTTGGCTGTTGTTTTACCGGTAGCAAACAGAAAAGTCTTTCTTGCTTGCATGTTTCCATTGAAAGTCCAGGGGAAAAAGAATGTAAAAGAGCATTCTTCTTATCCTTGGACTACTTCCCTTATTTATATGCCCTGTCATGTGCCACTGGAGGCATTTGAGTTTGTGACTCACCATCCGTGGTAATGGGAGTGGAGGGGAAAAGAGCCCTTTACCAAGGAATACAGGGTGTCTGGGAAGACTCTTGTTCCCTTTCTCATTGTGACTCCAGCTCCATCAGCCCTCCATGCTCAAGGCTGCCTGGGCTCCCTGGACATATCCACTTTTCCTTCCCTGGCATCTACCTCTGCCTCCATCTCTAGTGCTCCACCCCTTGTTGTACTGGCCTCTCCTTAGTCCTGCCCTGGAATGGCAGTGGGAGAGCCAGGTAGTAGCTCAAGGTCCAATGTTTAATCTGCACCATTATCCCCACTCACATGTGAACAAAGGGAGTTGGCAGATGATGCTAATTTGCCCCATCGGGAGGTCTGGCTACTGATAGAAAATAAGGGCCTCAGTGGGCTCAGAGCATAAGCAATCACATTAGACAAATCTCCTGCCTAAACAGGTCCAGGTTTAACCTGCTTACTCTGTTTCACAAATTGCCAGACATTAACAGTGTTCCTGCAGTTGCGTTTTCAAAGAAATGTGTTTTATTGCAAAAGAATATGTGATTTCAGATGAGACTGCAATGAAACTATAGATAACAATTATTTCTATTATCTTTTCAG|GGAGCTTGTATTCTGAATATGCTAAGGGAGTATCTTAGTGCTGACGCATTTAAAAGTGGTATTGTACAGTATCTCCAGAAGCATAGCTATAAAAATACAAAAAACGAGGACCTGTGGGATAGTATGGCAAGT|GTGAGTATGTTTTTGAATATCTCTGCATTTGGGATTGACAGGCTTATCATCTTGTTTTGTTTTCCCTGCATTATGTTAATCCCTCTGAGGAGAATCATTGTTTTCTATAGAAATAAGAGTGATGTGTTTATTTTTGGTTTTTAG|ATTTGCCCTACAGATGGTGTAAAAGGGATGGATGGCTTTTGCTCTAGAAGTCAACATTCATCTTCATCCTCA|GTAAGTTTCTATATCTGTACATGTTCCCCCAAGCACATTCTTTTACTGCATATTCTTTGAAAGGCAGCTCTGTGCCAAACTTTCTGAGGTCCTTGATTATATCACCCTCATTCCAGATAAGACTGCATTTAAACTATTCCATACTCATAATCTTTTTCAATTTTTCTTAAAGTGTATCTATACTGGAGGGTTGCAGAGCTTTCCTTGGTAATGCTTCTCACTGATACTAATTTCTCTAGCTTCCCTTTTAAAGCAGTGGATTTATGACATGTTTCTATAGCAGATTACAGCTGCATTGTAGCAGTCAAAAGGATATGTCAGTCATTTACAGAGCTCTGCATTTGTACAAAGACAATGGCACTGAGCATTCTTGAATACTTGTCATGTGTCAGGCACATGTTAAGCACTTATATGTATTATCTCCTTTACTCTTCATAATAAACCTGTGAGCTGGGTACTATTACTATCCCTATTTTTAAAGTTGAGAACATGAAATACAAAGACATCTGATTGGTAAATTGCAGAGCAGAGATCTGAATTTAGATCTAACTCATGTTTTTAACTGCTAAGCTATAAAGTATTCATAACATCAAGTCACAAAACAGCCTGAGTCTCTGTTCATCTGGACTTGTGGGATGTTTTCAGAGGAAGCTGAGGGTGAGTCTGGAGATACAACAGAATTTTTTGTTTCTTTTTTCTTTTTATATGTTTACTTTCTTGGCTTTTTTGTTCCTAGAGACTGCTTTATTCAGTAGTTTCTAGATTTGTCCTGTGGCACATTTTATCTTTTTACTAATTCTTTTTTTTTTGAGACAGAGTCTCACTCTGTTGCCCAGGCTGGAGTGCAGTGGCGTGATCTCGGCTCACTGTAGCCTCTGCCTCCCCGGTTCCAGCGATTCTCTTGCCTCAGCCTCCCAGGTAGCCGGGATTACAGGCACACGCCACTACACCCGGTTAATTTTTGTATTCTTAGTAGAGATGAGGTTTCACCATGTTCGCCAGGCTGGTCTTGAACTCCTGACCTCAGGTGATCCACCCAACTCAGACTCCGAAAGTGCTAGGATTACAGGCATGAGCCACCGCGCCCAGCCTTTACTACTTCTTAAATCATTTCTTAAAAGCTTTTTTAGTATCAAAAAACAGCTCCTTTTGAGTTCCCACTATTTGTTGAGTGTGGGTCATCTCTGTGTTCTCACTTTAGAAACACAGGCTCTTGACTGAACATTGTTTCCACCTTGCTTGTCCAAAACCAGCATAGTTAGGTAGGTATTGAAAACCTGGCACTTTCTCTCCCTTCTCTTCCTTCATTCATTCACATGCCTGCTTTGTGCCCAGTGTTATTGCCAGCCCCAAAGTGTGCCCGGCAGAACTAGGTATGCTACCTGTCCTCCAGACACGTATGTGTAGTGGAGGAAAATGACAAGCAAACAGCTGGTGCTCTAATGGAGGTGTGATGGCTGGGGACATCACACTGAGAAAGGGATCAGAAAGTGCTCCTAAGAGGGGAGGTTGGCAACTGGTAGCGGCTGTTTGAGCATAAGCTGGTGTTCTCTCTTTTCTTAGAGTTGGGTTAAATGGGTGATGTGTCTGCCTTTTTGTGTACACACCAG|CATTGGCATCAGGAAGGGGTGGATGTGAAAACCATGATGAACACTTGGACACTGCAGAAGGGTTTTCCCCTAATAACCATCACAGTGAGGGGGAGGAATGTACACATGAAGCAAGAGCACTACATGAAGGGCTCTGACGGCGCCCCGGACACTGG|GTAATGCTCCTAGAGTAAAATTTGTTTTGTTGTCTAGGTAACATCTGCCTTGTAGGATGGAACCTTGCTTTTGAAATAATGCCCTTACCACTATTGCTAAAATATTTCAGCTGCATCTGTGTATCCTATGAAGTTGACTTATACTCCCTGCCCCCATCTTCCCAGTAGGATTAAGGAGGCTTTAAACCTTGGTTATTCTCAGTAAAGGTGACGATGTAATTACTTTAACATTCTCATATTTTGTAATTTGATATGATGGTAATTTCTGGTTACTGGCTTGAAATCAACTCCAACCTAAGCAACTGCTACTAGATTACAATAGTGCCTAGCATTTGGTTGGAGCTGAGGACAAAAGAATTTAGGTGATTTCCGAGAATGATGAGAGACTCAGTTGTCTTCTTCTGAGTTAGATTTGGAACCTGTTTGTCAGCTTAATGCTATAGAAGAATATTATTAGAAACAAGATGCTGCAACTTGATTGACCCTGGATGGATACCGTTAAAAAATTCTTTTATCTTGAAACAATTTCAGACTTATAGAGAAGTTACAAGAATAACACAACAAATTCCTATATATCCTTTACCTAGATACATGTTAACTCTTTACTCCTTTACTTTCTTCACCTCCCTCTCTCTCTTGCTCTCTCTCTCTCTCTATATATATATGTATATGTTTGTGTGGATATATGTGTGTGTGTATATATACATATATATGTGTATATATATACACACACATATATATATATAATATATACACCTCCCTCCCCCATCTTCCCAAAAGGATTAAGGAGGTTTTAAATCTTGATTAATCGATATCCATCTATCTACATACATACCTATATACATATGTCTGTATGTATATACTTGGCATATGTTTTTCTGAACTGTTTGACAGTAAATTGGTGACATGATTCCCCTTCACACCTAAATGTTTCAGCATAGTATTTTCTACAAAACAACATTTATGGCCGGTCACAGTGGCTCATGCCTTTAATACCAGGACTTTGGGAAGCCGAGGCAGGAGGATCACCTGAGGTTAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACACCATCTCTACTAAAAATACAAAAATTAGCCGGGCATGGTGGCGGGCGCCTGTAGTCCCAGCTACTCAGGAGGGTGAGGCAGGAGAATCGCTTGTACCCAGGAGGTGGAGGTTGCAGTGAGCCGAGATCGCACCACTGCACTCCAGCCTGGACAACAAGAGCGAAACTCCATCTCAAAACAAACAAACAAAAAACAAGAACATTCAGTTATATAACCATAGCCACAGTACAATTATCAAAATTGGGAAATAAACTTTATATAATACTATTATCTATAAATCCCATTCAACTTTTTCCAATTGTCCTAAAAATATCCTTATAGCAAAATAGAAAATAAATTCTGACTTAGGATTTAATACAGGATCACTTATTTTATTTAGTTGTTGCATCATTTTGGTCTCCATTTTTTTCTGAAACAACTCTTCAGTCTTTTTTGGGCTTCTCGATATTGACATTTTTTGAAGAGTACTGGCCAGTTATTTTATAGAATGCCTTTCAGTTTGGATTTTCTGCTGTTTCTCCATGATCAGGTTCAGCTTACATATCATTGACAGGAATCCCAAGGAGGTGATATTGTGTCCTTCCTAATATACTATTCAGGAGGCACATGAGTCCTGAAGTTAGGCCTCTGAGATGCAATCAGCCAATCACTGTAAAAAGGCTTTCAGGGTTCACGTGAGTCCAAATTCTTAGCTCTACTCAAGGAGTCCAAGAAAAGGAAATAGTGCCCGAGCTGACACTCCCTACAAAGCTGGAAAACAGACTGGGGATGTTTTGAGAGCTTGGCTGTAGCAGTTTTGGTACTATCTTGTTCTAAACTAACCAGTGCCTGGCACATGATAGGTGATTTAATAACTGCTTGTTGAATTGAATCAACAAATGAACAATCCACCTCTCCTTTATTTTAG|GTACCTGTGGCATGTTCCATTGACATTCATCACCAGCAAATCCGACATGGTCCATCGATTTTTGCTAAAAACAAAAACAG|GTAATTTATTTTGGAAACTACTAGTTAATTCAAGGAGGAATGTGAAAATGTGTAGGATTAAAGTGCTGTTTCTTTTGCTATTTATCTTGGTTAAAGCAAAAGGGATCAGATTGAAAATGCTTTCTGGTTTCAAAGAAGAATGGCCAATATCTTGCCAGATAGAGATTATAGCTGAATTATATAATGCTAAAAAGGAGGGATTACCAGTGAACATTCACTATTACTTTGTTAACATTACTGGGTTATATCATATGTATGGCCAGAGATGTAGTTGCCATTCTTTTCATAGATCTGTAAATCTCACAAAATTGATGTCTAAAGCTAGTGTTAGCTTTTGTCTATTGAGTTGCTGTTTTCTGGTTTCTGAGAGAAATAAGTGATGTTTTCAAATTTTTCAGTATTAAACTCTTCTCTAATTTCCTTGCCCAATCTTGGATGAGAAAAGCTTTCTTCAGGCAGGGGAGCAAATGGAACTTTGATTTATTTTATTTACTCTAG|ATGTGCTCATCCTCCCAGAAGAGGTGGAATGGATCAAATTTAATGTGGGCATGAATGGCTATTACATTGTGCATTACGAGGATGATGGATGGGACTCTTTGACTGGCCTTTTAAAAGGAACACACACAGCAGTCAGCAGTAATGATCGGGCGAGTCTCATTAACAATGCATTTCAGCTCGTCAG|GTAATACACGCTGCACAAAGTCGCGGTTTATTTCTGAAAGCAGCTGTTATTGTTCAAATTCTTGATTTCTAAAGACAAAAATGATTGATTGATAACAAGAAGTTGAAAGGTGTTTTCCTTTAAGTTCTTCTAACAACCCAAGATTGCTTTTAGTCTTATAATTAACCTCCTGTCTGTGACTACTAAGGCACTTGAAGGAAGGAATCTGTATCTTAATCTTTCTCATTAATAATTCAGATCATACTGAGGCGGAAGTGTAGATAACCAGGAGATCAACATCCTGGCGAAACTCCTTGCCTTGTCCCCGCGGCATGGTCCCGCAGCTTCTTCTGGCCCCGCTATACCGGATCAGGTTTCTCCCACCACTGGGTCTTTATAACAGCCTTGAAGTCTTCTTAGAAGACTAAGTGACACAGGCCCAAGTTGTTTGCATTACATTTCTTGGGTTAAAGAAGGGATTTTTTTTTCTTTCCTAAACCAGATACCTGAACTTTGCAGCATCTTTATGGAATATAGCTCATAAAATGTAGCCCAACCAAATGATCTCTGAGTGTGTGAAGACAGAATTAACACTACCCTTTTTTTTTCCTCCCCCATGACAATGGTTTTTAAGGAAATGCTCCCAAAGCCTAAAACTCAAACTTCTTCAACATGTAGGCAGACCTAAAGTCCTAAGAGGAAGCATGCAGGTGGGAAGGATTATCTCTTTCTCCTCATACCCGTATCCTTCTGGCCTAAATTTTGAGTGCCTTCTGGTTCTTCTCACCACCATACTGGGCCTCCTGAAGTGAGAAAAAGCAATGGGGGAGAAAGTTATGGGTTGCTTACCTAGCTGTTTCTTTCTGCCTAAAAAATTCCCCTCTGTAGAATATTTCTGCTTGAGCCTTAGAGGGCTTCTTCTTTTTTATTTTTTAACTTATATAATATATACCCTACCACTGAGGTTTTTAACACTAACCCTCTGTAATGAAGGGTTTCTAAGGATATGCAGTGTCTTTGAAATGGAGAAGAAAATGTGTTTCCTCCAGGAACTCATGGTTTTGACCAGATGATAGTCTGTTTGACCAGGCTTCCTAAAGGGTCTTTGATGGAAATCCCTTGCTTACTCTAGAAGTTCACCACTCAATCCAGTGTTTCACAGGGATAGATGCTGATGTGCTTCCTTTCCGGCTTATCCACGTTAGCCCTAGACGTTACTGTTTGAGAAACTTCCTCCCTAGTGTAAACATGCCGGATGTTGTGTTATATGTGATAGAAAAGAGACTTATATAGTAGTTATCTATCTGTTTGTTTATTTATTTTTTGGAGATGGAGTCTCACTCTGTCGCCCAGGCTGGAGTGCAGTGGTGCCATCTCAGCTCACCGCCTCTGCCTCCCGGGTTCAAGTGATTCTCCGGCCTCAGCCTCCCACATACCAAGTACCTGCCACCATGCCTGCCTATTTTATGTATTTTTAGTAGAGACGGGGTTTCACCATGTTGGCCAGGCTGGTCTTGAACTCCCGACCTCAGGTGATCCACCTGCCTTGGCCTCCTAAAGTGCTGGGATTACAGGCATGAGCCACCGCGCCCGGCCGTAGTTATCTATTTATAAGGGACATTTATATCTGATTGCTTGTTTTGCTAGAAGATTAAAATATATTTTGAAGCAAAAAGTTGACTTCCCGGATTTACTTTTAAACCACTAACCACAGTGTCTCTTGGTCAG|CATTGGGAAGCTGTCCATTGAAAAGGCCTTGGATTTATCCCTGTACTTGAAACATGAAACTGAAATTATGCCCGTGTTTCAAGGTTTGAATGAGCTGATTCCTATGTATAAGTTAATGGAGAAAAGAGATATGAATGAAGTGGAAACTCAATTCAAG|GTAAAAGCCTGAAATAAAAGTTATGTAATTATTATTTGTGTTAAAAAGTGTTAATCATTGTGTGTGTATGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTATCTTTATATACATATTAAGGAAAAGCAAATAAATATTAGTTCAGTATTTAGATGGACTAGCAAGATTTTGGTTTTATTTGGCACTAAAAATAGGAATGGCATTCTTAGACTCTATTTAATTTGCATGGATTTGTTATTTCCTTCCTTTCACAAACCTTTTATTTCATTTTCCTTTCCTATTACAATGGCTGACGTTCAGATTTTCTGGGACTCTTCATGGTACTTGAGGAAGAGGCACAAAATTGTTATGCTTGGCAAAATGTCCTGAAGTCTTGTTGCATAATTTGCTCTCAAAATTTAGTCATGTAAACTCTCTTTGAGTTTGTTTTAAAAGTCGCAATTCATCCTGACTTTAAGTAGATGGATATCTTATAAAAGTTTGTTATGAAAATATGAAAGTCATTCATCATTATTTATTGTTTTTGGCCCTTTTATAATTAAACAGTGCTGCATGATTTATACAGTAAAAGTTAGATTATGCTTTAAAAATTAGCCCCCATCATCTGAGACCATAATGGATTATATTAACATGAAATGACCTGTGACAATACTGGTCCCTGTTTCCCTGTACAACGCCCTCAG|GCCTTCCTCATCAGGCTGCTAAGGGACCTCATTGATAAGCAGACATGGACAGACGAGGGCTCAGTCTCAGAGCGAATGCTGCGGAGTCAACTACTACTCCTCGCCTGTGTGCACAACTATCAGCCGTGCGTACAGAGGGCAGAAGGCTATTTCAGAAAGTGGAAGGAATCCAATGGAAACTTGAG|GTCAGTCCTTACTAAATAACCAATTTGTTGATGTGAAGGGCATCTTTTCTGTTTTCCATCATTGGTACTAAACATTAGGGAAAACAAAAAGTTTAAGTGTCTCCCCTGCTGCCCTTTTCTGGAAAATAAATTGCTTTTTAAGATTTATCTATGTATCTCGTAACTTTAAGAAATGCTAGGAGGGAACTTCTATGCATAAAAGTCAAATATCTGGGGAGTTAGGATGGTATGGGAATAATTCTCATTTTGTATAGGCAATGCAAAATCTTATTAAACGGTGACAGCCATGCCATAGAGAAACACATGCATTGTATTTTATAGTAGTTTCTTTGGGCTTCCAAATTCCCTGACAGGGATGCATAGTAATTGCTGACACTGTCTAAGCAAGGTAGGATTTTACCGTATCTGGAAATCCCTATTCTTGCTACACTTCAGTCCTTTTTACATTTGGAGCTTAAACCCCACCCAGGAAACATTTATGCCAACAACCTACATGCTTCAGCTTAAAGAAATCCAAACAGTGGTCCTACTGCCGTCCTGACTTGTGATCATGGTGTAAATTTTGAATATAGTTTGAATTTCTTTTGGCTTGAAAGGTGATCCCCTATGGAGTCACCCAGCATCATTAAAGTATTTAAATATGTAGGTATTTATAAAAATGGCATTTCACATTTTTGAGAAGGCTACTTAAAACCTAATTTTAGATATTTTTCTCTTGCCTTTTTTTTATAAAGTGTAACTAGGCCAGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCTGAGGTGGGTGGATCACGAGGTCAGGGGATCAAGACCATCCTGGCTAAGACGGTGAAACCCCGTCTCTACTAAAATACAAAAAATGAGCCGGGCATGGTGGCGGGCGCCTGTAGTCCCAGCTACTCGGGAGGCTGAGGTAGGAGAATGGCGTGAACCCGGGAGGGCGGAGCTTGCAGGAACCGATATCGAGCCACGGCACTCCAGCCTGGGTGACAGAGCAAGACTCCGTCCTAAAAAAAAAAAAAAAAAAAATTGTAACTGAAGGAAATCAGTAGTTTTTGTTTATATTTCCAGGCATGCTTCTCTGATCATGGGGAAAGTTCAGCAGTTCATAGTTAGTCACCATTATGCCTCTAAACATACTTACTGCACCTCAGATTCTCACCGAGTGTGTTCCTTTCTATAG|CCTGCCTGTCGACGTGACCTTGGCAGTGTTTGCTGTGGGGGCCCAGAGCACAGAAGGCTGGGATTTTCTTTATAGTAAATATCAGTTTTCTTTGTCCAGTACTGAGAAAAGCCAAATTGAATTTGCCCTCTGCAGAACCCAAAATAAGGAAAAGCTTCAATG|GTGAGTCCGTCATTCATTCATGTTCATGTGGCCAAGGGAAATTAGATTAGATTAGATCATTCTGGCATCTATTTTTGTTTTCTTGGCCAGGAATTGCCTATCCTGCTGGGAACATACTGCAAGTCAGCACACATCTAATGAGAAAGGCAAATAATTAGGGAGAACCAGGTGTTAGGAAAAATATTTAAAGGTGTATACATTTGTGCATCTTTAAAATGACAACATATTAAAAATATTTACTAGAGCTACTCTTCCTTAACTATTCTTTTGAGCAAATGAGAGGGTATATGAGAATGGGTGTTATGAACACTATATAAATGTTAGTTTTTATTATGTAGCTATAGTGGGTTTATCAGGATTGTGGTTACCCATTTATCTTTTTAACCTTTGTTACTGGCAGACTTTTTTTTCTTTTACAAGTAATTTTATTGCAGTGATTCATAACTGTTTCATCACCTATTCATTGCTAACATTTCTTTTCTTAATCCTTCTAG|GCTACTAGATGAAAGCTTTAAGGGAGATAAAATAAAAACTCAGGAGTTTCCACAAATTCTTACACTCATTGGCAGGAACCCAGTAGGATACCCACTGGCCTGGCAATTTCTGAGGAAAAACTGGAACAAACTTGTACAAAA|GTAAGTGGTGCCAAAAATTGTGCTGTGACTGGATAAGTTCATAACCTTACTGTGTTTTAGCCTTGCTGTTTGTAAAAGAACAGTAACAGTCTAAAGGTACTTTTTGATTGAAGATAGGCAGTAGAAATACCTAAAATATTTGTAGAAAACATAAAACTGGACTTCAGTGCTAACTAGTGAATCTGGACAGGGATGTTTTCCATTCCATCTGGCATAACCCCTTCCTGAGCCCATGGACATATCTGAAGCCTTCCTCCTCACAGTTCAGCCCAGGCCTTCCATGAACACATTTGCTTGTTCACATCTGTCTTTGTCTAACTCTTATAGCATTTCCTGCTTCTGTCATTTTCTGTTGGATACTTAACCTTTTATTAGGCTGTTGGTGTGTATTATTCTTTACAGCTAGATCTTAACCCATTGGATAGACATCATATTTTGTATTTTTCACACCGATCAGTTTTTAGCTGAAAGCTATTATATATAGGAGGCCCTTAAAATATATGTTAAATGAATAAGTATTTCACAACCCGTTTTTGAATATTTCCCTCTCTAG|GTTTGAACTTGGCTCATCTTCCATAGCCCACATGGTAATGGGTACAACAAATCAATTCTCCACAAGAACACGGCTTGAAGAG|GTAAAAAAAAAAAATCTATATATATATTTTTAAACATAAATGAAAATTAGCTAATTAATATGGGGTAGACAAAATACTTTGAGGGTGTGGTGAGTTAGAAATGGATTGTCATTTAGAAGTTATTTTTGGATGCTATGGTGTTGACAGCAGCATAAATCAGTTGCAATTAAACTAGTGAAAACTGTGCCTCTTCTCAGACACGTTAAGAGGTCTTAGCTCTGCCAGTAAAAACCTTAGGACTTGAAGAAAATTACTTGAGACAATTAGTCCTTGTTTAAGGATGTTAAAAGTGGGCACTGAGGTATAAATGACTGAAGTGGTCATCCAGCTGTTGCTAAAGAGGCAGGTCCGGAATCTAGGTTCTTTATAAACCATCCAAGTTCATGGTTCCCTGTCACTTATTATGTAACCATCTACCTTCAGAAAGAATTTCAGGCAGCTTTAAGATCCATGTATAAAAGAAATGTTAAAATGAAGGACAAAAAGATACATAATGGACAGGTGATAACCATGTGAGGGACTTATGGCTGAGGATAATTCTCGCAATTGTCCCCTGAATTTATCACAGACATTCCTGGAAGTCAAGGAAAAATGAGGAATCAGTATGAGTTATTCTCATTGTCTGGTAAAAGAGAACATGAAGCATACACATTTTCTACAAAAGCAGACTTTTCTTCAGTCCTAAACTCGAGGACTTTGATGTGTGGGCCATTGAGTTCTGTGGTGTCCCTTCTATAATAAGGTTTCCTATAGTAAAGTTGTGGAACAAATTTTACAAGGCTCTTAAACTGTGGTACATTTAGATAAAAGCTGAGAGGCTAATATTAATTTTTTTAGGAGTGGTTCCATTGTGTAGTCTGGTTACACTATTTTCTAGCTTGAGATGGGGAGAGAGCTTTGGAAATGAAAAAGAATGAAGGGTTGTTACAGTCTCGTAGCTTTTTTCCAGTTTTCAGTAGCCTCATCCAGGCTTGTAAAATTAACTTGCATATAATAATTCAAGCTTGAACTCCAGCGAGGTCTAGAGCAGAGATACTCTGTCTGATGACTGAAGAGATGTCCAAAGCCTTGACCAGAAAGGTAGTCATCTATAGACAAGATCTATAGCAACAAAGCATTTCATTCATTCACTTGTATTCTCATTAATTCATTCAAGACATATATGCCACACATGGTTTTAGGTTATGGGGCTGTAACAGTGAACAGACAGACAAAACCTTTGCTGTCACGGAGCTGAAATTTCTGCTGTGGGAATCAGACAATTACAGACAAATCAACCAATGTCAAGTAGTTACATGCTCTGAAGATGAATAAGGCAAGGAGAGTGATTGGGATAGAGGGTTGGTGCTGTTTTATAGGATGATTAGGAGACATTTGAGAAGATATCGAAGGAAATGCCAACATCTAGAGGAGTATTGTGTGCAGAGGAAATATCAGGTGCGAAGATCCTGAAGCAGGGCATGAAAAGTAAAGAGCCCGAGATGTTATCATAATGGAGAGCAAACTACGGACATATAAAATACGGGAGTCATCCTGAATCCTTTCTTCCCCAGACATCTAACTACGCAAGTCCTGCCGGTCTTCCCTCAGCACATGCACTGGATCTTGCGCACTTCTCTCTGCTCTTACCCCCATTCAATCCTTTCTCATCTCTTGCCTTGCCTATTGCAATAGTGTCCTAACTGGCCTCCCCCTTGCCACTTTGCCAATAGCAAAACATTTTCCACATTGAAGACTGCAAGTCAGCCCATAACTGACCCTGGCTTCTTGAAACCCTGCCTATTGTTCTTGCAGTAAATTCCAAATCCCTCCTTCCCATGTGTCCATGACCTTGAAGACCTGGCTTCTGAGGACCTCTCTGGTGCCAATTCCCACAGGCGCTCCTGGCTTACTCTCTCATAACACTGTAGCCACTCAGCCTCCTTCTGCTCCTGGGCTACCACATGTTTCTCCTGTCTTTAAGCCTCTGCACTTTTTATTCTCTCTGCCTGGCTTGCCCATTCCCCAACTTTTCACTAGAAAGGGGCATTTTATCTTTGAGAGGTCTCGGCTCAGATCACCTTCCCCAAAACCATGCTGTTCAATGCAGGCCACATCTCCTCAGTTACTAGCATACCCCTGGCATATTTTCTTCATAGCAGTTTATTTACAGTAATCATATTTACTGCTTGTATACCCCAAAAGACTGTAAGCTCCATGAGGGGAGGATGCTTGCCTGATTCACTGCTGCATCCTCAGTGTCTAGCACAGTAATTACAGCAGAGGGATGAATAAATATCAGAGTAAATATTGGTGAACAAATGGATGAGGTTAAGTCCCTGATACATGCTGGGTTTGGGGCTTTGCACTTTACCTGCATTATATAACTACATCCTTCACCACTGCTCTTCATTTTACAAATGAGGAGACTGGCTTCCATAGAAGTTAAATCATTTGCTCAAATCAAGTTAGTAGGATAAACCTGTTTTCCTATCCCTGTATTGTGCTACTTATTGCACAGAAATTGTTCTTAAAGAGCCAAGTCTGAATCAAATATTCAGTGGAGATGTTGCATTTCCAAGGCAGGTGAAGACAGAAGAGATGATTTTGGGTCAGGACAAGGGTAAGAGTAATGGTTAACAGCTGGCTTCGCTGTTGTAGAAGGTCTGTTTTTAGTGGAGCTATCATGATGAGCTCCTTTAGGATTAATGATTTGAACAATTAGTAAAATTATTGATGTGCTTTGTTGAGTAGTGCTCAGTGATTTTTAATCGACTGTGAATTAATCTTGCATTCTGAGAGCGTATGAAACAGTAGGTTGAGCTACAACTTAAAGTAGAGAGTGAAGTGTTTTCCAGGTTGCTACAGCAGGAGGCCATGCCTTCTGCTCATTGTAAAGTTGCACTGTATTGACATATAATTTTAAAACAACTCTGCATCATTTAAAATTGAATTCTGATCTTTCTAAAACCCATTCCTACTCCCCTCTTTATTCCCAAACTAATAATATGGTATTATGACATGGATTTCTAAGGAACTGGCTGTCTGGAATCTATGCTGAATAAATAATACATCATGGTCTACATTCGCTTCCTGAATACCAAATCAAAAATTGATGGATTAATGCTGTGAAAATTTATGGGAAAAGGATAATAACCCTTTAAGGTGAAACAGAATCGCACAATGGTCAATCTTGTGGCAAAGCCAGCCTATCTGTCATGTGAGCTCAAGGGGTGAATTAAGATACCCACAAATGAAACAAGAACAAACATATTTTTCAGGAGGTAGCCAAGAGTTTCTTATCTCTTTAAAATCATGGGGAACTTTATGTGGATTTTATCTTGAGACAACAATACATGAATTGTAGATTAGGATAAAAAAAATGGCAAGGTTTGGGTCTTACCGCCATAACTTGCTAAGAATCCCATTCCCCACAGTGTTTCTATAATAACATACAAGGCCAGGCCCATGGAGAAAGACAGCACTTACGTGGTGGAAACTGTTTTGCTTGGCAAAGAAAAGACTCTGCACATTCTGCTTTTTAGATATCATGTTTTTAAAAAACGAAGTTTGCATCTGTGAGAACAGAACAGAATAACACATTAAATAGACACAATTAAACCTTAATTATAAAATGGTAAACAGTGAAGTCTTTTAAAGTCTGGCAACTGAGAATAAACAAAAGAACTGCAGACAAAAAACAGAACTTGACATCATGAGGCATGAGCTCATTTCATACAGCTTATGTGTACATAATCCTATTCAGACAGCTGGGACTGCCTTCTATATAGAATTTTGACAAATGCTGGAATTTTGGCTTCAGTTTTAACTAAAGTTACATCTGATTAATGTGATAAAATTAATTTTTTAAAACCCACTTTTTCCTCACAAG|GTAAAAGGATTCTTCAGCTCTTTGAAAGAAAATGGTTCTCAGCTCCGTTGTGTCCAACAGACAATTGAAACCATTGAAGAAAACATCGGTTGGATGGATAAGAATTTTGATAAAATCAGAGTGTGGCTGCAAAGTGAAAAGCTTGAAC|GTATGTAAAAATTCCTCCCTTGCCAGGTTCCTGTTATCTCTAATCACCAACATTTTGTTGAGTGTATTTTCAAACTAGAGATGGCTGTTTTGGCTCCAACTGGAGATACTTTTTTCCCTTCAACTCATTTTTTGACTATCCCTGTGAAAAGAATAGCTGTTAGTTTTTCATGAATGGGCTATCGCTACCATGTGTTTTGTTCATCACAGGTGTTGCCCTGCAACGTAAACCCAAGTGTTGGGTTCCCTGCCACAGAAGAATAAAGTACCTTATTCTTCTCATTTTATAGTTTATGCTTAAGCACCCGTGTCCAAAACCCTGTACCCCATGTTTATCATTCATAAACTGTTTCATCAGTCTCCTCGAAAGACTCTGAATAGTCGACTACTGAACAATGAACACCTGGATCTGAGACTAAGCCGGACGATGACTGGGTTAAAGCTCTCCCGGCTCACCCCTCCAGACCCGCTGCCCATCCCTCTTCCTTGCTCCATGCCCAGGGGCTGACTTGTAAAGGCCAAGTCATCAAGCTTTCTTGCCCTTTGGATGTTGGTCAGTGGGGAGCCGGAGAGCTGGAGCTGGGGTCGGAGGAGGTAGTAGGTGGAGGTGTTCTTCCCTGATTCCCTTGCGGGATGCCTCGGGCTGGCCTCCCCTGAGGGTCTTAGCTCCGAGAGGGGACCCTCTTTTCCACACAGCCTTCTCCACCTCTGGATTTTGGTAACTGCTCCCTCCTCATCCCTTCAGGATTAGTGGCCTCAGTGGGAGTCTGGCTTTTACTAGTCCTGGCGGACTTGTGGTTTCTACATAATGTGCTCGCACTTTTGCAAAAAATCTTTTTATAGAACCCTCCTCAGATAATTCTGAGTGAGTGTCATCTATTTCCCTGACTGGTACAGTATCTCTTCTGAAAAAGCAGAGTGCATTCAAGTCTGTAGGAAAACCCTTTTCTTAGGGAGGTGATTTTTTTTCTCTCTCTGCTTCTTATTTGGCCTACTTTACAATTTCTAACTAACTAGTTATTGGCATTTACTGACAGTAAATTATTGCAGTCACCAATAAATGATAGTACATTGTGAAACAAAATATTTGCTCATATTAGCAAATAGGACATTCTTTGGCTTTGAAGTCTTTCTTTCTTTTGTGAAGACTTCACACACGGTTGCTTCAGCACACAGTTGCTGCTCAGGTTTTATGTATAGATGATAATAATAGAAAGCACAGTTTACTAACATGGTAAACCAACGGAGTTCAAGTCAAGTCAGTTAATACCCTAAGAATTAGATTTTATTTCTTATTCTGAAAACTTGCTACACAGGGACTTATCTAACCCATAGTGTGCTCTGTTGCTGACTTGATTCAAGTTGCAGCGTGTTTTGCGCTGACTCTAAGGTGCGGAAATCCTCACACCTGGCAAAGGAGAATTCAAACTGAACTTTTTGAATATAAGGCAAAAACTTCAAGATAAGGGAATATGATTGATGATTGGTACGAAAAATGTCAAAATGTGTTCCCCTAATACACGACAAAATAGAGTGACTTCTGGACATAAATCTGCCATTTATTAAACCATTCACTACAACAAATAAATAGGTATAAAAGTGGAATTGGAATTTTTATACTTATTTGTTGTAGTGAATGGTTTAATAAAAATAGAAATCACTGGTAATTTCCACCCCAAACTAAACTATTTCCCTTCTTTTAAAAAAATACACAACCAAGATTTTAATGTAAAATATTTTGCTTTAATTGTATTTTATGCCTTGATTAATGAAACATGGAAATATTGATTTTCAGTTTTGGTCACCTGAGGAACCTATCTTTGTTTGCTTTTGGAAAAGCCCATTTTCTAAACAGATACAATATTGCCACAACAATGTGCAGAAACCTTTTTGATAATAAAAAATTGTTCTTTGCCTCTAAGTGGATATTTGCAATTATTTTCTCTCTCCTAACTAGACTGTAAAAAGGGCTGCTTTAGATCCTGTAGCTTACTCCAGTTATTAGTTATTAACAAACACCCAAGTCTCGAAGATATTTCTAATTAAAAAAGAAGGCATATTCAGAGTTCTTTTTAAATAAATGTTGTTTACTTTTATAGGCATCTTTAAACTTCTGGATTTTGGTATGCCATTTAAAAATACTTCCAGATACACATGGAAATTAGTAATACTGAAGCCGTATCCTTGCAAACACATCTGTCAGTGTCAAAGGTTTCAAGGTTTTTCTTAAAAAAAGAAAACAAAAAAGCAAACACCTATACTGCCCAAATGGGAGGATTAGATACATGGTTAGAAATCCCTCAGGAAAAGTGTTTTTTCTTTTCTTGTTGCTGCCTTAAAAATAGAATAATGACTATTTCTGATGGATAGAGACATAGCATTTTAAGCTGGTGGTGTGTAAAATCCCATAGGTATGTGCATGACTTTCAGAGAGTATTTGGGGGGGAGGTTAACAAGATGTGGTGCCATTTATAAGCAGTGTTATTGTTTTTGCTTGCCCCGCTGCCACAAGTCAGCTAAGTCATAACAAAAGCTTCAAACTGATGCTAAGGAAGGCCATGCCCTTTGGAAACAATAAATTCCCAATCTGTTTTATGTTATGTACCTGACATCTTTTCCTGCATTCTCTACCAGGAAATAAAGATGAAATTAAATATCAAAATTCTAATCGATGATATCAGTGCAATGTTCAGGAACTATTCATTAAGATATTAGAAAACCATTCAAAGTGGTAGGACATCAGAGCCTACTTCTTACATTGCTGTGGGAGAAATGCAGGTTTCAAATTTAATATATATATATAATTTTTTTAAAAGCAGAAGTTTCTTTTTATATTTGGTAAACTTAAGTTCCATAAAGCCAGACGCTATACAGTGCAAAGGCTAATGTGGCAATAGCTCTAAAGACACAGTTGCTGCTCAGGTTTTATGTATAGATAATAGAAAGCACAGTTTACTAACACAGTAAACCAACAGAGTTCAAGTCAGATGAGTACCCTAAGAATTAATTAGATTTTATTTCTTATGCTCGAAATTTGCTACACAAGGACTTATCTAACCTTTATTTTGCTCTGTTGCTGACTTGATTCAAGTCTCAGTGTGTTCTGTGCTGACTGTAAGATGCAGAAGTCCTCACACCTGGCAAAGAAGAGTTCAAACTGAAAAGGGGTTTGGTGCTTCCTGGTTTGTCCAGGTTACCTGTTATTAATTTATTATTAGCAGCCCAAGAGGAGATATGTGCCCAATGTACAATATCTTATGTTTGACTTATAAACATTATCCCAAAGCAACATCAAATACAGTTCAAAAGCCCAAGAGGAAAGGGGGTAATAAGAAATCAGAACACTGAAGAATGTTTAAAACATTGTTTTCTAAACACTAACAAAAAAAATTAAGGGCAAACTGAAAATACAAATGAGATTTACAGGCACTGTGTGTAGAATGTGCAAAAATTCACTTAGCTTTTCTTTTGTTTTTTTGGTGTTGCTTTAAGAAACTTTATCAAATATATTTCTTACAAATATAAAGCTTTCTCTCCCAATTGAAGGCAATTAAAAAAATTCAAAGTTTATCAATACTCAGTACACAGGTGAACCAGTCAAATTCATTTTCTTTCTGGAAAAGAATAACAAACCAATATTTAGGATGTTCAGAGACTCAACAAAAACCATTCTAGAAATCACCCAGAACAATTGTTTTCTGTTGCCAAAGCCTTTTGTTCTTCAAAAGTCACCATCCACCAGCTGAAGATTTTACATGCAGATACCTTAAAAATTTCAAATAAAAAATGCAGTGAATCATTTAATATGTAATTTTCTTGTTACAGACATAGTAAATATACCACTTAGCAAAAGCATTGTATAACAGACAGAAGGAATTTCCTATAAGTAAACATGAAAGTGGATTAATAGAATTTTTTTTAATTTTGGGAAAAATGTTAGAGCAGTTCTACCTAATATAGCTCCTTTTTTCCTAGGAAATAAACATGGATCATGGTGAGAGAGCTGAACCCGATTTAACCTATACTTTGATTTCTTTTAGGCTTTGGTCAGTAAGTGCTTGTATGCTTTTAAGGCTTACATTAAGCCCTCTCCTTTCTGAAGATTAAGATAAGGGCCCAGTTCTGAAGATCTCAGAAATCCCTTCAATCTGTCCAGTTTCTTCAGCAATTATAAATTAGAATCAAACACAGTACTTTACTTTCCAAAATAATGACAAATAAAAATGGCCAATCTTTTCCTTTGCCCTTGTTCCCAAAACCCTGTAATTTCCACCAGACTCTAAAGGGTACTTCTCCCCACCCCACCATGTATTGGTATTCTGGGGAATGCCAATGTCTTCAGGACATTTCGCTAAGTTGTAACATGCTAATTTTGCTTTGCCACAATAACCTGCAGTAATGAGCATTTGGATTTCAAAGATTCAACTAGCCTCAGATGGTCATTCTAAGTGCCTGGCCTAATATTTTTAAAAGCTTTTAATTATAAGATTAAGGTTTTTAAACATTTCAATAGCAAACTCATCCCATTTAGTGCTTTCAGAGAATGACCAATTACTCTGTAGATCTTGCAGTATGCATTTCATGCCAATACTGTAAAGTGAGCATGAATTACTCAAGAGGTGGACTTCACTTCTCTCATCTATAACACATAAATTGGCAGAAGATACAGTTGTCTTCATTTACAAATAAACACCCAACTTACCAGATACCTTAACTTGTATTTCTTTAGTCATCTTTTGGCTTGGAAGTTTCCTCTGTTGTCTAAAAGGGAAAGCAAAACCATCCGTGAGCTTTCTTTTCTGTATTAAGTATGAGGAGATGGCCTTCTCAGAATTAGGGGACAAAAGATGGCAGTCAGTGGGGAAAAATAAGATGGTCCTTTCAGTTTCTCTTCTTCATCTGGCCACAATATTGTGAGTTCTTCCTCCTCAAGCTTATAAGCTAAAAATAACCTTAAGTGATCCTGATCCTAAATGTATCACTCTCAGCTTTATTTTTTTAAGGCTAGGGTAGGTTATAAAATATGCATAGGTGTTCTTAAATGGCAATGTTTCATTCTGTGGTGATCCTTCCTTTCCTGTACATAGGGTCATGGCTGTCCAGTAAATCCACTCATCATAAAGGGTTATTATGCTTTCTATTATTTGTTAAAGGGCTGATTAAGTACTTCGTATAACTGAAATTAAATAATAGGCAAATAAGTATTTAAACTGAGATATAATGGCATAACTGCCAATTATCAATGCTGTTTCCTATATCCCTTAAGGGAATCTACTACAGGAAGATTTTAATATATTGTTTTAAAAGCTTTGGTGTAACTGGACTATTGTCTTTAAAGCTACACCTTTAACTCCTCCTTATAGCCAGGGGATCATAGTAATAATCATTTAAATCATATGTTCTTGGAATTGGAAGGGACCTAGAAGTTGTCTAATCCAATTTTCATTTCTCATCACAGCTTGAGTAATTTTAATAATAGGAAGTTTCCAACTTCCATGTTCTAAAATCTATATAAACCACATACTATGGTGGTATTTTAATTAGGGCAAGAAAGACAGGCAAAACACAGGCAAATTGGGTTGTGTCAAGAACATTCATTTAGGATTTTAAAGCACCAGTTACTTAAAAATATATGTATTTATACAAATTCAATTACTTTACCAAGCGATGCTATGGGATAGATAGTACTTTAAGAAATTTTATTCTAAATAGATTCCAGTAGGAAACTAAATGACTGAAATGATAAACCCTACTCTGTGTAACTGCTAAACTAATTTGTAGTATATTTACTGCTCCATTTACCTTTGCTGAATCCTTCGCTTTACCTCCATTCTTAGGTGCTTTGGAGCTGGAAGCAGCCTTCTTGCACTTATCCTAAAGCAAAGGAAATGTAATGAGGCTATGGCTATCACCAATCCATCTAGCAATTAACTAGGCTGCAGTTAAATTAATCCAGACCATTCTGAGATCTCCAATTTAATTAAATAATGGAAGACTTTGGGTGTTTTTTCATGATTTTTTTTGAACAACGGTGTCAGAGAATTGTTTAAAGCAGGAGGGAAAAAGGATAGAAATAAGGAGGGAAATGTGGGTGACATTGATGCTCTAATTCCCGTGGTGCCTGACTCACCCAGACCTTATTTTGCTAATCAAAACAGAGCTTGTCAATAGATAATAAATGTCGGCAAGGGTGTGGAGAAAAGGGAACCCTAGTACACTGTCAGTGGGGATGTAGATTGGTATGGGCTATGATGGAAAACAGTATATAGGTTCCTAAAGAAATTAAAAGTAGAACTACTGTATGACCCAGTAATCCCTCTTCTGGGTATATACCCAAAGGAGATGAAATTATCACCTTATAAAGATATCTGCACTCCCATATTCACTGCAACATTACTCACAATAGCCAAGATATGGAACAACCTAGTTGTTGATGAATGGATAAAGAAAATGTCATGTATATATACATAATGGAATATTATTCAGCCTTAAAAAACGATATCCTACTATTTGTCACAACATGGATGGACCTGGAAGACCTTATACTAGATGATATAAGCCAGACACAGAAAGAAAAGTGATTTCACTTATATGTAGAATATATATAAAAGAAAAAGCTCAAAAACACAGAGAATAAAACATGGCGACCATGGTAGGGAACAGGAGGAGGAAACAGAGATATAGGTCAAAGGATACAAAATAGCAGATATGCAGAATGAACAAGTGTAGAGAGTTAATGTATAACATGAGGACTAAGGTTAATAAAATTGTATTAGGGATTTTGTTAACTAAGTAGATTTTAGCTGCTTTTGTCACAAAAAGTAGTTGTGTGAGAATGATAGATATGTAAATCTGCTTCCCTACAGTAACCATTTTATTATTTCTATGCATCCCAAACTACCATGTTGTAAACCTCAAATATACACAATAAAATGTATTTAAAAAACAAAATAGAGCTTGTCTCGATCAGGACTGGCTTTTGTGTACCAAAAGGCAAAAAAAAAAAAACAAAAAAAAACCCTGTTTTCAGTGTTATGGGAGAGAAATGAACAATGGGAAACAACCGAGGAAAGCTGGAGCAGGTTACGTATAAAAATAAAGTCCATTCACCAAAAAAGGCATTACTTACGAGTTACCAGGGGTGAGAGATAGGATGCTGAAGTGGTCTAGAAATTAAGCTACCCAGTATGGAAGGGCTGACAATTCAGTGATCGAGAGCAGTGCCTTAGAACAGCCAAAACAATAGCAAACTGAGATCTGCAGAATTAACTCTCCTGAAAATAACAAGGAGGTACTCATTTCACGTTTCCTTCTATTTGATTTACAAGAGGGTGTAGCTTGAGGGAAAATGCCTCACACTTGTTGAATTACACAGTTGTTTCTCATTCACTTTTAATCACGTTTTGAGCACCTGCTAAGTACCAGGCATTTTGCTAATGAGGAGCACAGAGGTAAAAGACACATCACTACTGTATGAAATGCGTAGCTCAGTGGTGTGATACACAAGCACAGAGAGGTAACAGAGAGCAAGGAGGGCATGGAAGAGAGGCCTCTAACTTTGGACTGGGAAGGGAGAAGATGTAAGACAAGAAAGTCTTCCCTAAGGAGCTGATGCTTGAGCTGTGCCCTGAGGAATGAAGAGTAGACCAGTTGGGCTAAGCAGACAGAAAAGGGGAGGAAGCTCCAGAGAGCAAATGAGCATGAGAGTGCCTGATGTAGTTAGGGCCTGCTCTCACTTTAAATGAACACAGACATAGCATTATTGTGGCACAACCATATAGTGTGGAGATAAAAAATGGTGGCTATGGAAATTACAAAGTAGCAGTTAAGAAATAACGTTAAGCAGTGTTTTATAAGTGGACTGTAAGTATAATTATGTAAAATATACATATAGAAAAAAAAGGAAATCCACAAAATAATACTGTTTTGGGGGCAGTGGAATTATAGGCATTTTTTCTTTTCTTCATTTTCAGGTTCTCTATATCATCGTTTGATTCATTCTACAGTTTAAAAATTGTAAGGGCCAGGCGCAGTGGCTCAAGCCTGTAATCCCAGTACTTTGGGAGGTAGAGGTTGGCAGATCACTTGAGCCCAAGAGCTCGAGACCAGCCTGGCAACATGGCGAAACCCCCTCTCTACAAAAAAATACAAAAATTAGCTGGGTGTGGTGGTGCACGCCTGTAGTCCCAGCTACTCAGGAAGCTAAGGTAGGAGGATTGCTTGAGCCCAGGAAGCAGAGATCGCAGTAAGCTGAGATCACACCACTGCACTCCAGCCTGGGCTATAGAGTAAGACCCTGTTACAAGACAGACCGATAGATAGATCAATCAATAAATAAAACTTATATGTATGTACACATACACACACACACATTTCAAAGAGTGAAATGTGAAAAAGCACAGTACCTTTGCTGTGTTCTGTGAGGTTTCTGTAGTGGAGGGACAGCTGTCCAGATCTCCTGAGAGAGCATCAATGGGGTCTTGGTCATCTGCAGGTTTCTGAGATATGAGTAGAAATAACCATCAGTGAAGAAGCAGAAGGCAAAATGCAATATGGGGTCTTTTCCCACATCACTTACAAATAAAAGATGTTTCTATAAGAAAAAAACTGACTGACATTCTTTATTAATAATAATGTATTGTAAAGGAGATAGAAAAACAAGAAAATCTTGATGGCTTTTTTTCCATCTACTCTTTAATACACGTTGCTTAGCATTCTCTGAGCCTCAGTTTTCCACCCTAAAGGGCTTTTGTGAAGACTAAAAGAGAGGTAAACAATAGTCACACACATTTATATGCATGCTTTGACAAAGTACCAGGCACAGAGTAGGCATTCAATATGTTTTAGTTTTCTAAAATGCCAAATACCCCTATGGCTAGAATAAAACAAAATTTAATGGAAATATGTTCCTATGGTCTTTACCTTTGAATCCTCTGATTTCTTTGTAGGTGGCTTCACTGGTTTGTCCTTAAAAAGAAGGCAGACTATTGGTTAAGCATAGATATCTGTAAAGGTTTACTTAGGTTTAGGCAGTAGAGAATCTATTGTCCCATGACTTGACTTGGATAAAATGGAGTGTAGACTTGCAATAACTAATAAACCTGAGTCCCCACTATTTCTTTACCAGCTTTGCTTAATTACTATTCTTTATAATCATTTGTGTAGACTTGGGGGAAACATATGAAAGGGTCTGGTCCTGGAAGTGAAAGAAACACTATTTGTGAACCTTCTGCCATCATGTATTATCACTTATAATTCTACTTAAAGTGTTATAAAAAGTTACTTGTGTTTTTACATCTTATTTACTATAGCTTATTACACTTAGAAAGTCATCAGGGATTTTTCCCAGCTTCAAAGGCAAGGGCCTTAAATAATAAATTTCCCAAAGAACGCAAGCAGGGTGAGTTGGTACTATCAAAGTGGGAAGGGCTCTAGCAGGATGTGGAATTGCTATCTTGAGGAATACTGAATGCAAGCAAGGAGAATTGGTTCCTGTAATAGGAGACCCTGAAGCTGACACTGCTATTTAAACCAGGAATCATCTTACTCACCAAAATGAATCAGTACAGAAAGAGAAGGGTGTAATTTGGTCTTCCTTGTTTCTTATTTGATGAACAGGCATGGAAAGGGTTATTTATCATCTAATTGACTCAAAAATTAGAAACTAAATAGGAAAGAGGTGGGGGACAGGCCTGGTTGACCTACGAAAGACTGGCCTCTATCATGTGGGAGACAAAAGGCCAGGGACTTTTTGGCAGAGAAACAGGATTTGTGATTGGGAATATTGCTTTGCCTGTCTTCACTTGCAATAGTGCTGATGATGATGCAGGAGAAGATAGGGAGACCCCAGGTCTTGGAGCTGCCTTATTAATTTTCCCTATTAATTATCCCAACACCAGCTCCTTTTCTGCTCTCCTTAAACAGAGATTGCTTCCTTTGAAATCCTATCACCTTAGTCATTTTTATAGCCTTTTCTTGAAAGAGAGAATCACATTTCTTGGATCACTCTTAAGCATCTGTGAGTGACTGAACAGTATTCAACCCTGTTTATACCACAGAGCCTAGCATATGCATATATAATGATATTGGCTCAAATAAATATTTACCTGCTGGTCGGCTTTGGTGACACATTAGAAGCAGTCAGTCATGGTATGTTTTATGCTATTGTAAAGGAGTATAGCTACTATTTATTTCAAGGGAGGTTTGAATTTAAAGATCTTGGTAGCATAAATCCGATCTAGCAATTTGCCTCAGTTTACCTGTCCATTATCATCCAGGAGATGTCTGTATTCAGGTGGGATAGTGTCATCTCTTTCTCCAAGCTTGTCTCTATGTTCAGCTTTAGCTTTTTCCTATATCAACAGTGAGCAGATAGAATTAATATTCATTTCCTCTTTCACTTAGAAAATACATTGTCAAATGCAGCATTCAGCATTTGTTGTACTTTCATGAGGCAAAACATATGGTCTGTTTTTATTTTTTAAGCAACAGAACAACACAGATGACTTCAACAGACTGCTAGTTGGCTAAAATATAAAATCCCGTATGCTTCTGTATGTCAATTCATATCTGTGAATTTTCTAGCTATATTTTAAATGGAAATAAATGATTAAATAATTATCTTCAGAAACCATGTTAGGAGATTAGAACCCAAAGGTATAAAATATCTTTTTCTTTTTTTCTGTATGGTTTCACTTTTCTAATACAAAATCAGGCCACTGTACCTTGCCATTTAGAGAGGTCACATTTACAACTTTGCCTATTTATAAGCAACCCTGAAGGACAACAGCTAATTTGAATGGATGTGCTGACTGCTTGTGTTGCATGGGAGGAAGCCATGCATACCCCACACCTACCTCCCAGAATCCCCTCAGGGAGGCTCCGCTGGTATCTCTGTCACAGATTCAGTGACCTTCCCCTCCAGTGGAGTACAGATTGATTTTTCTATTTCATTATAATTTCATGTTTAAAATATAGGTTAAGTTCACATCAATATTCCTATGACAATGACAGAGTCAAGACCCAAGGATTAAAATTTCACTATTAGTGAATTTTTTTTTTTTTTTTTTTTTTTTTTACCTTTACTTTATCTTCCATTGGTTTGTTCTCATCTGGGTCAGGCTGCCTTTGTCCTAGACTGTCAGAGAGTTTATCCAAGGCATCATCGAGGTCTTTGTCACTCTGCTGAAAAGTAAATAATGCTGAATTAGTCACTCATTAGCCAAACTGTATCAGAGGCAAATTAGCCCAGGAACTCCATCTTTTCAGGAGGGAACCTGTTTCCTTGGAAGAAAAAGACATCTGGGGCTGGGGCAGGGGAGTAGACAGGGTCAGAGAAGAGAAGCCTAGGAATGGAAGATCAGGAGAAGAGCGGCAGTATCTGTCACCCTACTGGGGTTGGAGGGCCAGCACCTTCCACCCAACCCTGCCCATCTCTTGGTGAAGATCCCACCAGGTTAAGGAGGTCTTAGTGGCAGCCTCAGGAGCCATATCCAGTGGGTGACCTGGAGGTCACATAAAGGGTCCAAAAGCAAATGAACCAATCATGTGTGCCTTTCATTTAGAAATTAAACACCATTAGAAAACTGGATATGAAAACAAACATCTACTAATGTTGTCAGATGGTTAGGAAGCAAGATTCTGCAACTATAGAGGGTAAGTGTTTCTTTGGTTCTGTGGGTCCTCTCTAAAACTCTAAGATCTTGAGGGGTGCATTTCAGAAAGTGCAGCGTGACCCGCAGTTTGTGGGAAGCCATGGAGCTCGGCACTGCCATCCTAATACTTCCTAAAGCACAAAACCCCAGAGACAATCTGGGGTCAGGAGAGTGGAAGGGGCTTGTCTGCCACACTGGTGATGAGTGCCCTGAAAGACTTCAGAGAATTTCTGAACTGGTGGGGAAACCTCTCTTTTCATCTTCAGGAAGCTCATGGAAGTGAAATTGCAGAAATGGGAGCTGGTATTCTAGAGGAAAAAAATTATGGACAACAATATCACTGTAACTAAGATAGCTTATTTCCTCTAACATTTATTTACTGTATGATTCAGGCAGCTTATTTAACCCCTTTCAGCTTCAGTTTCCTTGGCCGTGAAATGTGAATAATAGTAGTACTTATACTCCTAAGTTGCTGAGAAAAGTAAATGATTGAAAAGGCATTTAAAACAATACTAGTTGTATGTTAGGACCCAACAAATGGTAAATTATTATTAGTATTATTATAGCAAAATCCATATTTTTCAACACATTGCATTCAAAATTCCACCTCTAAATGAATTCAATTAAAATGTGTTTAATATCTACATTGTATAAGACACCATGCTGAACTCTGTCCAATACTGTATAGAACTTTCCAGTTGATTTTCAAAATGTTTTCACATACACTATCCAGTTTTATTTGATGCCCACAATGGTTCTCAGTGAACTAAGCAGGCTTTTTTTTTTTGAGGTAAAAAGCTCAGGGAAGCTAAGTCAGTTGCTTGAAAACAAATTGTTAAAAAGTGAATAGAACCCAGGTCTTGGGACTGATAAAGCTTTTCCCTGTCATGCTTAGTCACATCCATGATCTTCTATTTTCTTTGAAGCAGTTTTCCTGTTGGAGTGATTTTATTACACAGATCTTTGAAATCATGTCTTCAAATGCTTTCAGTGTATGTAACACTGTTAGTAACAATCTAATAATCACAGCAAAGAAAGCTCCCGTGAATTATCATGGTTTATTTGACTCTTCGATTTCCTAATATTTTTATCTAAATAAAGCTTTATACCTTGTTTTAGTGACATCTTCAAATAAAATGTTAACTAAAAACAAGCCTCTCTGATGGGAAATGTGATCAGAGAAGTGTCATTGTAAAACCTACTTCTTAAAGGCAAAAAAGTTTTTGATTGCAAATGTTTACTGATAGCCTTCATCAGGGCAGAATCTCTGGCCTGAATATTAAGAACTGAAGTGTAAACGGCAGCCTAGGCTATTAATGATTCTTCCTTTCTGTTGCATGGGGACTTTCTTCATTGTGGGTGTGTTTACATACACACATGCACATGCACATGCACATACACACACACGGGGCATTTTACTGGTTTTAAGTGCTTTATTATAATCCAGGATTATAGCTGCTAATGGTAGAGCTGCCCGGGGCCAGGTCTGGGCTTTGTCATTTGTGCCTCTGGATATTTTCAG|ATGATCCTGAAGCTGACGCAACAGGATGAAAATCCATCAGAATCTCAGACTACAGCACTAAATATGCTTTGATGCTACATCAAACGGAATGGAAGCATAGCTGACTTCGCTAAAGTTACTTCATCTCCATCTAGCAAATGAGGCACTGTTCTCAACCAAAGGAGATGGGGATCTGGTTTAGGGCAATCCCTTTATAATTTGATGTGCTGTGGTCTCCTTGGTAATGTATAATTTGGTATTGCACAGGTGATTAGTCAAGGAAGTCTGGAAAAGCTTTGGTCCCACAGCCTTGCCTCACAGCATGTAAATAATTAAAACAATATTGATGCTGAGGTTCTTCTACTGCTAGTATGAAAGTGACAAATTTTTACTGGTGTGAATTGGGAAGAAAACAATGCTATTCCATGACGTTTGTAAAATGTTTGTAAAAGCTCAAACATGACGATTCCATAAAATAAACTTGAGGTTAAATAATGGGTAGTAAATTATAGAATGTAT|AAGAAAAAATATAAAGGAGAAAATCAATTATCAGGAAAGCTAAAGAACTTTTCAAATCTAGTAATTTGAATATAGACACAATGCACTTTATTGCACTTTCAATTCTTATAAAGCAACAATAATATTAAGGTCCTTGACTATGTGTACAATGTTTTCACATATATAGTTTCATTTAATCATTTCAAAGTTAATCTCTGCCATCTCGCTAAATCATCAGTCTCGGCTCTTCTGAAATAGAAGGTGCCTGATCTTCCTAATAATTCTGCCTATTTTCATTTGCTTTAAACAGGCGCCCTATTTTCTTTCTAGTTGTGGCTGCGCAAAAACATTTATCTCCCAAATAAGATGTGCTGCTTACCGAGGTATCACGGGGTGGGGCTCCAGCTTGGGTCGTTGAAGCTGGGGTTTGGGAAACCACTTCAGAGATGGCAGCAGCAAGTTTAGCATCTTCAAATTTCTTTTATTGAAAAAAATTTTATTAGTAACATGTTGTATATAAAATTATGAGCACAATGCCATCACTTAACTATAACTCTTAAAGATAGCTTAATGACTGTTTATTCTCTTGACCAAATAGACTCATAATAACATATAATTTTAAAAGAAATTTAAATTCTTTCTTCTCTATTGTATTATTTTATACAATTTGCTATTTCTATTTCCTTCTCATATTGATTATTCTAAATACTATGCAATAATATAACTTAGAGTTCCACGGTTTGTTTACACATTTCCTGTTGTACATTTAGGTTATTCAAAGTTTTCAGCTCTTTTAAAATTGCTCTGAATAAGTTCTAGTGAGTGAGTTATGGTGCTGGCTATATTTTGCTAAACTGCCCTCTCAAATGTTGCTAGGAATTCATACTGCGAAAAGCAATGAATAAGCATGCCTGTTTTCCCATGGCCTTGCTTGCCAGAATTTGACTTTTATTATGATAATCAGTGTAAAATGATATACTACTATTGCTTGTATATTGTGGTATACGGTGTCAGGTTTCAGGGTTTTTTTTCAACGTTAAATATTCTAGAAACTTTCTGAAATAATTTCTGTTTAAAAATATTGAATATTTGCTTCATTTCAAATACTCCCTTTTGACAAAAAAACTTAGGTATAACTGTTGATGAAAAACCAGAAAAAAGTCCAGAACTCTTTGGTGACTCCAACTATGGATAGCTTATTTTGAAAAAGGAGAATTGCAAATTTTACCAAAAGATGGAGAAAAGCACATTAAAAAGATACCAACATTCAGAAATTCATTTCAGCATGTTATTATTGGAAATTATTTAAACTAATTTAGATAACTATAAGATACTTATTGTCCATTTATACCCTGTAAAGCCGTTTTAGAATGTAATATTTTAGGTAATCCAAAATGTACTAAATTAAATTCATTTTTAGTTATGAGAAATCTTTGCTTATATGACAAATGAAAAGAATAACAAGTTGTCAAATGAAAAGAATGACATTGAAACATTTGTATTGTCTCTTCTTAAACTATCTTATTGACTTATTATTTAAGCCTTTTAATACTAAGTATGAAACAACCTATGGTCTGGAAATTTGTATCGCAAAGCTATATGTGCATATGTTATTTAATTCATCTAATGCTACACAAAAGCATAAAATAATGATTTTTCACTCTCTTTAAAAATACTAAATCATTTATGTCCATTTCTCAATTTTTTCATTGATCTATGCTTTGAGTTTGCTTTCTCAACATTATTGTATTTTCCACTTATTATTACTGTATAACATATGCTAGTGTTTAGTTGGATTAATCTTACCTAAAAGTACTGAAAAATGCTTTTTAGTACTTTTTCATATTTTATACATTTATTTTCCGAATGTATCATTGAATAATTTTATTGAGTTATAAAAGTATCTTATTGCTATTTAATAAAAAATTAACACATAAAATGACTTGAATTGTCATCATTCTTTTTAAGATATTTAGTTAAACTGACTTAATGTATGGCCTTCAATTTTTTTGTGTCCTTATTTTTCTGATCATTTCTCCTTTTATAGTTTACATTAAGTCTGATCTCATATTAATTACATTTTCTCATCTGTTGTTACTAATAAACATGGCATAATGTTACTTACAAATGTATTATCTACAAGTAGTGCTATCCACAAATATATTCAAATGTTCCCTTTTAATGTTTGTCATTTTTTTCATGTGTTGTTAATGATTCTTCCATGTG'

In [6]:
offset = -len(erap_reference.split('|')[0])
offset

-691

In [7]:
def find_occurrences(s, ch):
    return [i for i, letter in enumerate(s) if letter == ch]

def replace_char(s, i, c):
    return s[:i] + c + s[i+1:]

find_occurrences('AATAT', 'T')

[2, 4]

In [8]:
sequences_unique = {row['Sequence'].strip() for row in rows}
sequences2i = {sequence: str(i) for i, sequence in enumerate(sequences_unique)}

seqs_to_align = [
    str_to_fasta_seqrecord(
        str(i),
        sequence 
    )
    for sequence, i
    in sequences2i.items()
]



In [9]:
from tgsts.sequtils.fastio import write_fasta
from tgsts.sequtils import str_to_fasta_seqrecord
from tgsts.sequtils import clean_sequence

write_fasta(
    'erap_seqs.fasta',
    seqs_to_align,
    force=True
)

write_fasta(
    'erap_reference.fasta',
    str_to_fasta_seqrecord('ref', clean_sequence(erap_reference)),
    force=True
)



In [10]:
#Run minimap
import subprocess

subprocess.run(
    [
        "minimap2",
        "-a",
        "erap_reference.fasta",
        "erap_seqs.fasta"
    ],
    stdout=open("erap_frags_aligned.sam", "w"),
    check=True
)


[M::mm_idx_gen::0.003*1.87] collected minimizers
[M::mm_idx_gen::0.005*2.24] sorted minimizers
[M::main::0.005*2.23] loaded/built the index for 1 target sequence(s)
[M::mm_mapopt_update::0.005*2.13] mid_occ = 43
[M::mm_idx_stat] kmer size: 15; skip: 10; is_hpc: 0; #seq: 1
[M::mm_idx_stat::0.006*2.06] distinct minimizers: 8894 (98.97% are singletons); average occurrences: 1.025; average spacing: 5.278; total length: 48105
[M::worker_pipeline::1.149*2.94] mapped 437 sequences
[M::main] Version: 2.22-r1101
[M::main] CMD: minimap2 -a erap_reference.fasta erap_seqs.fasta
[M::main] Real time: 1.155 sec; CPU: 3.379 sec; Peak RSS: 0.166 GB


CompletedProcess(args=['minimap2', '-a', 'erap_reference.fasta', 'erap_seqs.fasta'], returncode=0)

In [11]:
#Load aligned

from utils import build_full_length_alignments

ref_aln, subread_strings = build_full_length_alignments('erap_frags_aligned.sam', 'erap_reference.fasta')

piped_ref_aln, piped_read_alns_strs = add_pipes(erap_reference, ref_aln, subread_strings)



In [12]:
class Alignment:
    def __init__(
        self,
        ref: str,
        ali: str
    ):
        self.ref = ref
        self.ali = ali

        self.rrs = identify_rrs_ali(ali)
        self.seq_rr_masked = mask_rrs(clean_sequence(ali))




In [13]:
#Generate alignments
def generate_alignment(key, ali):
    return Alignment(piped_ref_aln, ali)

args = [
    (k, ali)
    for k, ali
    in piped_read_alns_strs.items()
]

res = run_concurrently(
    generate_alignment,
    args,
    mode='process',
    batch_size=10
)




Processing: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 437/437 [00:59<00:00,  7.37it/s]


In [14]:
assert not res.failed

In [15]:

piped_read_alns = {
    r.args[0]: r.result
    for r
    in res.successful
}

In [16]:
next(iter(piped_read_alns.keys()))

'0'

In [17]:
def get_erap_exonic(gen_seq: str) -> str:
    ea = exonic_alignment(
        str_to_fasta_seqrecord('cons', gen_seq),
        'ERAP1',
        'erap_full'
    )
    return ExonDict(*next(iter(ea.items()))).seq

def calculate_mms(seq: str, mode: str = 'gen') -> str:
    seq = str_to_fasta_seqrecord('cons', clean_sequence(seq))
    locus_lib = ANTypingLibs().get_locus_lib('ERAP1')
    locus_lib.load_db()

    if mode == 'gen':
        mms, _ = mm_profile_from_seqs(
            seq,
            locus_lib.get_gen_ref_seq(), 
            locus_lib.get_gen_ref_seq(), 
            locus_lib,
            'gen'
        )
    elif mode == 'cds':
        mms, _ = mm_profile_from_seqs(
            seq,
            locus_lib.get_cds_ref_seq(),
            locus_lib.get_cds_ref_seq(),
            locus_lib,
            'cds'
        )
        
    else:
        raise ValueError('mode must be gen or cds')
    mms = ', '.join([str(mm) for mm in mms])
    return mms
        
    
def get_erap_cds(gen_str: str) -> str:
    annotator = Annotator('gen')
    res = annotator('ace78ee52a044de361c280b47d9e8', gen_str, fast_alignment=True)
    return res['sequence']['coding']

    

In [18]:
class OverlapError(Exception):
    def __init__(
        self,
        message,
        sample,
        frags_left,
        frags_right,
        overlaps
    ):
        super().__init__(message)
        self.message = message
        self.sample = sample
        self.frags_left = frags_left
        self.frags_right = frags_right
        self.overlaps = overlaps

        self.write_output()

    def write_output(self):
        out_path = f'./out/fails/{self.sample}.txt'

        if os.path.exists(out_path):
            os.remove(out_path)

        with open(out_path, 'w') as f:
            f.write(self.sample+ '\n')
            f.write(self.message + '\n\n')

        if_left = self.frags_left[0].fragment
        if_right = self.frags_right[0].fragment

        overlap_lines = []
        overlap_lines.append(f'         \tCoding\tNonCod\tRR    \tKmer')
        for (i, j), overlap in self.overlaps.items():
            overlap_lines.append('\t'.join([
                f'F{if_left}:{i}/F{if_right}:{j}',
                *[
                    str(int(_)).ljust(6)
                    for _
                    in overlap.match_tuple
                ]
            ]))

        with open(out_path, 'a') as f:
            f.write('\n')
            f.write('\n'.join(overlap_lines) + '\n\n')
                    

        ali_lines = []
        msa = MSA(*self.frags_left, *self.frags_right)

        #Get overlapping boundary idx
        i_start = max([
            len(re.match(r'[-|]*', ali).group())
            for ali
            in msa.alis 
        ])
        i_end = len(msa.ref) - max([
            len(re.match(r'[-|]*', ali[::-1]).group())
            for ali
            in msa.alis 
        ])

        ref = msa.ref[i_start:i_end]
        alis = [ref] + [ali[i_start:i_end] for ali in msa.alis]

        #Lose all columns which are just -s
        ref, *(alis) = [
            ''.join(ali)
            for ali
            in zip(*[
                bases
                for bases
                in zip(*alis)
                if set(bases) != {'-'}
            ])
        ]
        
        labels = [
            f'F{if_left}:1', 
            f'F{if_left}:2', 
            f'F{if_right}:1', 
            f'F{if_right}:2', 
        ]
        batch_width = 80

        for i in range(0, len(ref), batch_width):
            s = slice(i,i+batch_width)

            ref_snippet = ref[s]
            alis_snippets = [ali[s] for ali in alis]

            label_ljust = 20
            
            mm_line = ''.join([' '] * label_ljust) + ''.join([
                'v' if len(set(bases)) != 1 else ' '
                for bases
                in zip(*alis_snippets)
            ])

            ali_lines.append(mm_line)
            ali_lines.append('ref'.ljust(label_ljust) + ref_snippet)
            ali_lines.extend([
                label.ljust(label_ljust) + ali
                for label, ali
                in zip(labels, alis_snippets)
            ])

            

        with open(out_path, 'a') as f:
            f.write('\n'.join(ali_lines))
        
class AssemblyError(Exception):
    pass

In [19]:
class Fragment:
    def __init__(
        self, 
        seq: str,
        ali: Alignment,
        fragment: int,
        read_count: int,
        fragment_name: str,
        gen_mms: list[str]
    ):
        self.seq = seq
        self.ali = ali
        self.fragment = fragment
        self.read_count = read_count
        self.fragment_name = fragment_name

        try:
            self.mm_pos_dict = {
                mm2pos(mm): mm
                for mm
                in gen_mms
            }
        except:
            print(gen_mms)
            raise
    
    def merge_aln(self, other: 'Fragment'):
        return MSA(self, other)

def mm2pos(mm: str):
    return int(re.search(r':(-?\d+)', mm).group(1))

mm2pos('5utr:-296delACACACACACACAC>')

-296

In [20]:
class Overlap:
    def __init__(
        self,
        frag_left: Fragment,
        frag_right: Fragment,
    ):
        self.frag_left = frag_left
        self.frag_right = frag_right

        self.match_tuple = self.assess_overlap()

    def __bool__(self):
        return self.match_tuple[0] and self.match_tuple[2]

    def __str__(self):
        return str(self.match_tuple)

    def __repr__(self):
        return f'Overlap{self.match_tuple}'

    def assess_overlap(self):
        overlap_aln = self.frag_left.merge_aln(self.frag_right)

        #overlap_aln.print()

        ref_ali = overlap_aln.ref
        alis = overlap_aln.alis

        #Get overlapping boundary idx
        i_start = max([
            len(re.match(r'[-|]*', ali).group())
            for ali
            in alis
        ])
        i_end = len(ref_ali) - max([
            len(re.match(r'[-|]*', ali[::-1]).group())
            for ali
            in alis
        ])

        #Get union of rr bases
        rrs1, rrs2 = overlap_aln.rrs
        rr_bases_1 = {
            i
            for rr
            in rrs1
            for i
            in range(rr['ali_start'], rr['ali_end'])
        }
        rr_bases_2 = {
            i
            for rr
            in rrs2
            for i
            in range(rr['ali_start'], rr['ali_end'])
        }
        rr_bases_both = rr_bases_1 | rr_bases_2

        #Set to false when mismatch found
        coding_match, noncoding_match, rr_match = True, True, True
        
        for i, (ref_base, *(ali_bases)) in enumerate(zip(ref_ali, zip(*alis))):
            if not i_start <= i < i_end:
                continue
            if len(set(*ali_bases)) == 1:
                continue
            #Mismtch found
            #print(i, ref_base, ali_bases, overlap_aln.get_feature(i), end='\t')
            feature = overlap_aln.get_feature(i)
            if feature == 'coding':
                coding_match = False
            elif feature == 'noncoding':
                noncoding_match = False
            else:
                raise ValueError('unexpected feature found?')

        left_tail_masked, right_head_masked = trim_intersection(
            self.frag_left.ali.seq_rr_masked,
            self.frag_right.ali.seq_rr_masked,
        )
        if len({
            str(left_tail_masked.seq),
            str(right_head_masked.seq)
        }) != 1:
            rr_match = False

        kmer_similarity = - calculate_kmer_distance(
            clean_sequence(alis[0][i_start:i_end]),
            clean_sequence(alis[1][i_start:i_end]),
            7
        )

        return coding_match, noncoding_match, rr_match, kmer_similarity

In [23]:
class MSA:
    def __init__(
        self,
        *fragments: Fragment
    ):

        if len({
            f.ali.ref
            for f
            in fragments
        }) != 1:
            raise ValueError('ref must be identical')

        self.fragments = fragments
    
        self.ref = fragments[0].ali.ref
        self.alis = [f.ali.ali for f in fragments]
        self.rrs = [f.ali.rrs for f in fragments]

    @classmethod
    def from_fragments(cls, *fragments: Fragment):
        return cls.from_alis(*[
            frag.ali
            for frag
            in fragments
        ])
            

    def get_feature(self, i):
        upstream = self.ref[:i]
        pipe_count = upstream.count('|')
        if pipe_count % 2 == 0:
            return 'noncoding'
        else:
            return 'coding'

    def consolidate(self) -> str:

        #Sort by i_fragment
        msa_sorted = MSA(*sorted(
            self.fragments,
            key = lambda x: x.fragment
        ))

        #Counts of each fragments
        fragment_counts = Counter([
            f.fragment
            for f
            in msa_sorted.fragments
        ])

        #Can only align if there is max 1 seq per fragment
        #Frags must also be contiguous
        i_fragment_prev = None
        for i_fragment, count in fragment_counts.items():
            if count > 1:
                raise ValueError(
                    'Cannot consolidate unless only 1 poss seq per fragment'
                )
            if (
                i_fragment_prev 
                and (i_fragment - i_fragment_prev) > 1
            ):
                raise ValueError(
                    'Fragments must be contiguous and overlapping'
                )
            i_fragment_prev = i_fragment
                    
        #Always choose fragment with higher read count

        #Sort fragment indexes, higher read count first
        prioritised_fragments: list[Fragment] = sorted(
            msa_sorted.fragments,
            key = lambda f: f.read_count,
            reverse=True
        )
        prioritised_alignments = [
            fragment.ali.ali
            for fragment
            in prioritised_fragments
        ]
        
        consolidated = []

        for col in range(len(prioritised_alignments[0])):
            base = '-'
            for i, aln in enumerate(prioritised_alignments):
                if aln[col] != '-':
                    base = aln[col]
                    prev_base_frag = i
                    break
            consolidated.append(base)

        return ''.join(consolidated)

    def print(self):
        ali_len = len(self.ref)
        for i in range(0, ali_len, 80):
            print(
                self.ref[i:i+80],
                *[
                    ali[i:i+80]
                    for ali
                    in self.alis
                ],
                sep='\n',
                end='\n\n'
            )




class AssembledSequence(str):
    def __new__(cls, *overlaps: Overlap):
        """
        Assemble all relevant information from list of frags
        """
        frags = [
            overlaps[0].frag_left, 
            *[
                overlap.frag_right
                for overlap
                in overlaps
            ]
        ]
    
        ali = MSA(*frags).consolidate()
        seq = clean_sequence(ali)
    
        obj = super().__new__(cls, seq)

        obj.overlaps = overlaps
        obj.frags = frags
        obj.mms = calculate_mms(seq, mode='gen')
        obj.exonic = get_erap_exonic(seq)
        obj.cds = get_erap_cds(seq)
        obj.cds_mms = calculate_mms(obj.exonic, mode='cds')
        obj.name = ''.join(
            frag.fragment_name
            for frag
            in sorted(
                frags,
                key = lambda x: x.fragment
            )
        )

        
        return obj

In [24]:
class FragmentPair(list):
    def __init__(
        self,
        frag1: Fragment,
        frag2: Fragment,
        fragment: int
    ):
        self.frag1 = frag1
        self.frag2 = frag2
        self.fragment = fragment

        super().__init__([frag1, frag2])

    def product(self, other: 'FragmentPair', enum=False):
        if not enumerate:
            for a, b in product(self, other):
                yield a, b 
        else:
            for (i, a), (j, b) in product(
                enumerate(self), enumerate(other)
            ):
                yield (i, a), (j, b)

class TilingRow(tuple):

    header = (
        'sample',
        'desc',
        'allele',
        'allele_name',
        'exon_count',
        'cds_mms',
        'gen_mms',
        'cds_homoz',
        'gen_homoz',
        '12CDS',
        '12RRA',
        '12GEN',
        '23CDS',
        '23RRA',
        '23GEN',
        '34CDS',
        '34RRA',
        '34GEN',
        '45CDS',
        '45RRA',
        '45GEN',
        'gen_seq',
        'cds_seq'
    )
    
    def __new__(
        cls,
        sample: str,
        desc: str,
        i_allele: int,
        allele_name: str = '',
        cds_mms: str = '',
        gen_mms: str = '',
        homozygous_cds: str = '',
        homozygous_gen: str = '',
        cds_seq: str = '',
        seq: str = '',
        overlaps: list[Overlap] = None
    ):
        overlap_dict = {}
        if overlaps:
            #Sorted overlaps for increasing frags
            overlaps = sorted(
                overlaps,
                key = lambda x: x.frag_left.fragment
            )
            for overlap in overlaps:
                label = f'{overlap.frag_left.fragment}{overlap.frag_right.fragment}'
                overlap_dict[f'{label}CDS'] = 'Y' if overlap.match_tuple[0] else 'N'
                overlap_dict[f'{label}RRA'] = 'Y' if overlap.match_tuple[2] else 'N'
                overlap_dict[f'{label}GEN'] = 'Y' if overlap.match_tuple[1] else 'N'

        exon_count = (
            ''
            if not seq
            else '20'
            if 'intron19:33433T>A'
            in gen_mms
            else '19'
        )
    
        
        obj = super().__new__(
            cls,
            [
                sample,
                desc,
                str(i_allele),
                allele_name,
                exon_count,
                cds_mms,
                gen_mms,
                homozygous_cds,
                homozygous_gen,
                overlap_dict.get('12CDS', ''),
                overlap_dict.get('12RRA', ''),
                overlap_dict.get('12GEN', ''),
                overlap_dict.get('23CDS', ''),
                overlap_dict.get('23RRA', ''),
                overlap_dict.get('23GEN', ''),
                overlap_dict.get('34CDS', ''),
                overlap_dict.get('34CDS', ''),
                overlap_dict.get('34RRA', ''),
                overlap_dict.get('45GEN', ''),
                overlap_dict.get('45RRA', ''),
                overlap_dict.get('45GEN', ''),
                cds_seq,
                seq
            ]
        )
        return obj
            
            

class FailedTiling:
    def __init__(
        self,
        sample: str,
        reason: str
    ):
        self.sample = sample
        self.reason = reason

    @property
    def rows(self):
        return [
            TilingRow(
                self.sample,
                self.reason,
                i+1,
            )
            for i 
            in range(2)
        ]
            

class Tiling:
    def __init__(
        self,
        sample: str,
        *fragment_pairs: FragmentPair
    ):
        self.sample = sample
        self.fragment_pairs = fragment_pairs
        self.assemblies = self.assemble()

    @classmethod
    def try_init(
        cls,
        sample: str,
        *args,
        **kwargs
    ):
        try:
            return cls(sample, *args, **kwargs)
        except (OverlapError, AssemblyError) as e:
            print(f'{type(e)}: {str(e)}', end='\n\n')
            return FailedTiling(sample, str(e)) 

    @property
    def rows(self):

        homozygous_cds = len({
            assembly.cds
            for assembly
            in self.assemblies
        }) == 1
        homozygous_gen = len({
            str(assembly)
            for assembly
            in self.assemblies
        }) == 1
        
        rows = [
            TilingRow(
                self.sample,
                '',
                i+1,
                assembly.name,
                assembly.cds_mms,
                assembly.mms,
                'Y' if homozygous_cds else 'N',
                'Y' if homozygous_gen else 'N',
                assembly.cds,
                str(assembly),
                assembly.overlaps
            )
            for i, assembly
            in enumerate(self.assemblies)
        ]
        return rows

    def assemble(self):

        possible_assemblies = PossibleAssemblies()
        
        for frags_left, frags_right in zip(
            self.fragment_pairs[:-1],
            self.fragment_pairs[1:],
        ):
            i_f_left: int = frags_left.fragment 
            i_f_right: int = frags_right.fragment 
            overlaps = {}
            for (i, frag_left), (j, frag_right) in frags_left.product(
                frags_right, 
                enum=True
            ):
                overlap = Overlap(frag_left, frag_right)
                #print(f'- A{i}/A{j}', overlap.match_tuple, bool(overlap))
                overlaps[(i,j)] = overlap

            acceptable_overlaps = {
                k:v
                for k, v
                in overlaps.items()
                if v
            }

            if len(acceptable_overlaps) == 0:
                raise OverlapError(
                    f'No overlaps found between F{i_f_left} and F{i_f_right}',
                    self.sample,
                    frags_left,
                    frags_right,
                    overlaps
                )
            if {_[0] for _ in acceptable_overlaps.keys()} != {0, 1}:
                raise OverlapError(
                    f'F{i_f_left} could not overlap both frags to F{i_f_right}',
                    self.sample,
                    frags_left,
                    frags_right,
                    overlaps
                )
            if {_[1] for _ in acceptable_overlaps.keys()} != {0, 1}:
                raise OverlapError(
                    f'F{i_f_left} could not overlap to both F{i_f_right} frags',
                    self.sample,
                    frags_left,
                    frags_right,
                    overlaps
                )

            #If there are three possible overlaps, one is always
            #removeable as one fragment has an unambiguous choicfe
            if len(acceptable_overlaps) == 3:
                f_left_counts = {0: [], 1:[]}
                for _i, _j in acceptable_overlaps.keys():
                    f_left_counts[_i].append(_j)
                #Find the fragment which has two options
                greedy_frag = 0 if len(f_left_counts[0]) == 2 else 1
                modest_frag = int(not greedy_frag)

                del acceptable_overlaps[(
                    greedy_frag, 
                    f_left_counts[modest_frag][0]
                )]
            #If there are 2 possible overlaps, perfect and unambig
            #If there are 4 ambigs, this is am ambiguous overlap.
            #This may or may not be a problem, depending on whether there
            #are polymorphisms both upstream and downstream that become
            #ambiguous due to the overlap ambiguity in this frag
            #Either way, here we add all possibilities
            

            possible_assemblies.update(list(acceptable_overlaps.values()))
            
            #print()
        #print(len(possible_assemblies), 'poss assemblies')

        overlaps1, overlaps2 = possible_assemblies.get_final_assemblies()
        
        #Consolidate final sequence from the frags 

        assembled1 = AssembledSequence(*overlaps1)
        assembled2 = AssembledSequence(*overlaps2)

        return assembled1, assembled2

            
class OverlapLinkedList:
    def __init__(
        self,
        head: Overlap,
        tail: 'OverlapLinkedList' = None
    ):
        self.head = head
        self.tail = tail

    def update(
        self,
        new_head: Overlap
    ) -> 'OverlapLinkedList':
        if self.head.frag_right is not new_head.frag_left:
            raise ValueError('New head doesnt match list')
        return OverlapLinkedList(
            new_head,
            self
        )

    def __iter__(self) -> list[Overlap]:
        '''
        return list of overlaps corresponding to the tree 
        '''
        
        frag_list = [self.head]
        
        if self.tail is not None:
            frag_list.extend(list(self.tail)[::-1])
            
        return iter(frag_list[::-1])

class PossibleAssemblies(list):
    def __init__(self):
        self.overlap_lls = []
        super().__init__(self.overlap_lls)

    def update(self, new_overlaps: list[Overlap]):

        #If no present linked lists, create them
        if not self.overlap_lls:
            for overlap in new_overlaps:
                self.overlap_lls.append(OverlapLinkedList(overlap))
            super().__init__(self.overlap_lls)
        #Else, assign them
        else:
            new_overlap_lls = []
            for ll, overlap in product(self.overlap_lls, new_overlaps): 
                if ll.head.frag_right is overlap.frag_left:
                    new_overlap_lls.append(ll.update(overlap))

            self.overlap_lls = new_overlap_lls 
            super().__init__(self.overlap_lls)

    def get_final_assemblies(self):

        unique_masked_sequences = {}
        
        for ll in self.overlap_lls:
            overlaps = list(ll)

            frags = [
                overlaps[0].frag_left, 
                *[
                    overlap.frag_right
                    for overlap
                    in overlaps
                ]
            ]
            unique_masked_sequences.setdefault(tuple(
                f.ali.seq_rr_masked
                for f
                in frags
            ), []).append(overlaps)

        #print(len(unique_masked_sequences), 'final assembly')
        #print('\n\n')

        if len(unique_masked_sequences) > 2:
            raise AssemblyError(
                'More than 2 possible sequences - unable to tile across frags'
            )
        elif len(unique_masked_sequences) == 1:
            possibilities = next(iter(unique_masked_sequences.values()))
            chosen_overlaps1, chosen_overlaps2 = self.select_two_alleles(
                possibilities,
                possibilities
            )
        else:
            possibilities = [
                ps
                for ps
                in unique_masked_sequences.values()
            ]
            chosen_overlaps1, chosen_overlaps2 = self.select_two_alleles(
                *possibilities
            )

        return chosen_overlaps1, chosen_overlaps2

    def select_two_alleles(
        self,
        poss1: list[list[Overlap]], 
        poss2: list[list[Overlap]]
    ) -> tuple[list[Overlap], list[Overlap]]:

        acceptable_pair_scores = {}
        #Find best combination of overlaps that only consumes each frag once
        for overlaps1, overlaps2 in product(poss1, poss2):
            #Skip if any overlapping fragments
            fragment_ids_1 = {
                id(frag)
                for overlap
                in overlaps1
                for frag
                in (overlap.frag_left, overlap.frag_right)
            }
            fragment_ids_2 = {
                id(frag)
                for overlap
                in overlaps2
                for frag
                in (overlap.frag_left, overlap.frag_right)
            }
            if fragment_ids_1 & fragment_ids_2:
                continue
            acceptable_pair_scores[(tuple(overlaps1), tuple(overlaps2))] = sum([
                overlap.match_tuple[3] #Sort on kmer distance
                for overlaps
                in (overlaps1, overlaps2)
                for overlap
                in overlaps
            ])

        #Get selected overlaos
        overlaps1, overlaps2 = sorted(
            acceptable_pair_scores.items(),
            key=lambda x: x[1],
            reverse=True
        )[0][0]

        return overlaps1, overlaps2

        frags1 = [
            overlaps1[0].frag_left, 
            *[
                overlap.frag_right
                for overlap
                in overlaps1
            ]
        ]
        frags2 = [
            overlaps2[0].frag_left, 
            *[
                overlap.frag_right
                for overlap
                in overlaps2
            ]
        ]
        #for frags in [frags1, frags2]:
        #    for frag in frags:
        #        print(id(frag), end='\t')
        #    print()
        #print(frags1)
        #print(frags2)
        #print('\n\n\n')
        
        


        

        return frags1, frags2


                    

            
        
        
        


In [25]:
directory = "out/fails"

# Creates directory and intermediate folders if they don't exist
os.makedirs(directory, exist_ok=True)

In [26]:
# Load into sample dict
samples = {}

for row in rows:
    sample = samples.setdefault(row['Samples'], {})

    sequence = row['Sequence'].strip()
    alignment = piped_read_alns[sequences2i[sequence]]
    i_fragment = row['Fragment']
    read_count = int(row['NumReads'])
    fragment_name = row['Fragment Name']
    gen_mms = [
        mm
        for mm
        in (row['gDNA_mismatch_list'].split(', ') if row['gDNA_mismatch_list'] else [])
        if mm != 'No_MM'
    ]

    fragment = Fragment(
        seq=sequence,
        ali=alignment,
        fragment=i_fragment,
        read_count=read_count,
        fragment_name=fragment_name,
        gen_mms=gen_mms
    )

    if row['analysis_code'] == 1:
        sample.setdefault(i_fragment, []).append(fragment)
    elif row['analysis_code'] == 2:
        sample.setdefault(i_fragment, []).extend([fragment, deepcopy(fragment)])
    elif row['analysis_code'] == None:
        continue
    else:
        continue
        raise Exception('Other code found:', row['Typing Results::analysis_code'])
        
    #print(len(sample.get(row['Typing Results::Fragment'], None)))
    
#Lose weird numbered samples
for sample_id, fragment_info in [*samples.items()]:
    for i in range(1, 6):
    #for fragment, sequences in fragment_info.items():
        if len(fragment_info.get(i, [])) != 2:
            print('Deleting', sample_id)
            print(sample_id.ljust(20), f'fragment {i}', f'{len(fragment_info.get(i, []))} sequences', sep='\t')
            del samples[sample_id]
            break

    
#Lose None sequence
for sample_id, fragment_info in [*samples.items()]:
    for fragment, sequences in fragment_info.items():
        if None in sequences:
            print('Deleting', sample_id)
            print(sample_id.ljust(20), f'fragment {fragment}', f'{len(sequences)} sequences', sep='\t')
            del samples[sample_id]
            break

In [27]:
sample_fragment_pairs = {}

for sample_id in samples:
    sample_fragment_pairs[sample_id] = []
    for i_fragment, fragments in samples[sample_id].items():
        fragment_pair = FragmentPair(*fragments, i_fragment)
        sample_fragment_pairs[sample_id].append(fragment_pair)
    sample_fragment_pairs[sample_id].sort(
        key = lambda x: x.fragment
    )

In [None]:
tuple([0, 1, 2])

In [None]:
#tilings = []
#for sample_id, fragment_pairs in sample_fragment_pairs.items():
#    tiling = Tiling.try_init(sample_id, *fragment_pairs)
#    tilings.append(tiling)
#    print(tiling.rows)
#    print('\n\n\n')

In [28]:
from tgsts.utils.parallel import run_concurrently

concurrent_results = run_concurrently(
    Tiling.try_init,
    [
        tuple([sample_id, *fragment_pairs])
        for sample_id, fragment_pairs
        in sample_fragment_pairs.items()
    ],
    mode = 'process',
    raise_exceptions = True,
    max_workers = 30,
)

Processing:   0%|                                                                                                                                                                                                              | 0/66 [00:00<?, ?it/s]

- A0/A0 - A0/A0(True, True, True, 0)  (True, True, True, 0)True 
True
- A0/A0 - A0/A0(False, False, False, -42)  (True, True, True, 0)False 
True
- A0/A0- A0/A0 - A0/A1 (False, False, False, -59) (True, False, False, -13) (True, True, True, 0)False - A0/A0
 
False- A0/A1 True
 (True, True, True, 0)(True, True, True, 0) - A0/A0 - A0/A1TrueTrue 
 
(True, False, True, -1)(True, False, True, -2)- A0/A1   TrueTrue
(True, True, True, 0)
 - A0/A1 True- A0/A1(True, False, True, -3)- A1/A0
  (True, False, False, -13) True(True, True, True, 0) 
- A1/A0- A0/A1 False 
True(True, True, True, 0)
 True
 - A0/A0(True, True, True, 0)(True, False, False, -53)- A0/A1- A1/A0   True(True, False, True, -1)
- A1/A0- A1/A0- A1/A0   - A0/A0- A1/A1  - A1/A0(True, True, True, 0)True  False- A0/A0(True, False, False, -13) (True, False, True, -1)  
(True, True, True, 0)
 (True, True, True, 0)(True, False, True, -1)  (True, True, True, 0)- A0/A0  True- A1/A1(True, True, True, 0)   TrueFalse
 True (True, True, True,

Processing:   2%|███                                                                                                                                                                                                   | 1/66 [00:00<00:23,  2.77it/s]

 (True, True, True, 0)
- A0/A0 
 (True, True, True, 0) 

True(False, False, False, -83)- A0/A0(False, False, False, -56)(True, True, True, 0)- A0/A1 TrueFalseFalse- A0/A1(True, True, True, 0) 
 True  TrueFalse- A0/A1
 - A0/A1 - A0/A0   (True, True, True, 0)- A0/A0


 - A0/A0True- A0/A0 (False, False, False, -465)
True(False, False, False, -74)
 
False  (True, False, True, -2)- A0/A0FalseTrue(True, False, True, -1)- A1/A0  - A1/A0- A0/A1(True, False, True, -1)
  - A0/A1 True

(True, True, True, 0) 
(True, False, True, -4) (True, False, True, -1) - A1/A0

 - A0/A1 - A0/A1True(True, True, True, 0)   (True, False, True, -4) (True, True, True, 0)False
- A0/A1 False- A0/A1True  (True, True, True, 0) True - A0/A1(True, True, True, 0) 
 - A0/A1(True, False, False, -52)True(True, True, True, 0)(True, True, True, 0) - A1/A0  
True

 TrueTrue - A0/A1(True, True, True, 0)
(True, True, True, 0)- A0/A1- A0/A0  (False, False, False, -72)True 
 - A1/A0 True  True(True, False, True, -1)(True, False, Tr

Processing:   3%|██████                                                                                                                                                                                                | 2/66 [00:00<00:26,  2.44it/s]

 (False, False, False, -73)
 (False, False, False, -72)True 
(True, True, True, 0) (True, True, True, 0) - A1/A1  FalseFalseTrue


True
(True, True, True, 0) 

(False, False, False, -55)
 - A1/A0(False, False, False, -410) TrueFalse - A1/A1(False, False, False, -461) 
- A1/A1



 False- A1/A1
 - A1/A1True  True

(True, True, True, 0)  False- A1/A1- A0/A0- A0/A0 
True
 False 
- A0/A1(False, False, False, -408)False

- A1/A1
- A0/A0(False, False, False, -465) False
   (False, False, False, -82)- A1/A1

(True, True, True, 0)
(True, True, True, 0)
- A1/A1  
   True- A1/A1

(True, True, True, 0)- A0/A0(True, True, True, 0)(True, False, False, -28)  
 
  (True, True, True, 0)False- A1/A1- A0/A0(True, True, True, 0)(True, True, True, 0)- A0/A0False
  - A0/A0   (True, False, True, -4)False- A0/A0True(True, False, True, -1)- A0/A0 True
     

(True, True, True, 0)True True(True, True, True, 0)- A0/A1False 
 
  True
(True, False, True, -2)(True, False, False, -14)TrueTrue- A0/A0(True, True, True

Processing:   5%|█████████                                                                                                                                                                                             | 3/66 [00:01<00:31,  2.01it/s]

(True, True, True, 0)  - A0/A0- A0/A0 (True, False, False, -37)
 - A0/A0 (True, True, True, 0)
- A0/A0(True, True, True, 0)

 (True, True, True, 0) (True, True, True, 0)True (True, False, True, -1) True  True False - A0/A0  True- A0/A1 (True, True, True, 0) True
- A0/A0 - A0/A1(True, True, True, 0)
(True, True, True, 0)False
- A0/A1(True, True, True, 0)- A0/A0
True (False, False, False, -83)- A0/A0True
 - A0/A0TrueTrue - A0/A0

 True  - A1/A0
 
 
  - A0/A1- A0/A0- A0/A1
(True, True, True, 0)  

(True, True, True, 0) 

True (True, True, True, 0)
(True, False, True, -4)True True(True, True, True, 0)True(True, True, True, 0)    False(True, True, True, 0)
<class '__main__.OverlapError'>: F2 could not overlap to both F3 frags (True, True, True, 0)- A1/A1- A0/A1
(True, True, True, 0)
  
(True, True, True, 0)
 
 (True, False, True, -1)(False, False, False, -70)(False, False, False, -252)True- A0/A0
- A0/A0 - A0/A1

True    
- A0/A1True- A0/A0True True- A0/A0

Processing:   6%|████████████                                                                                                                                                                                          | 4/66 [00:01<00:21,  2.89it/s]

TrueTrue  - A0/A1
   
True (True, False, True, -1)(True, True, True, 0)True 
 
True 

- A0/A1True- A0/A0False False- A0/A0(True, True, True, 0)(True, True, True, 0)- A0/A1- A0/A1
(False, False, False, -213)
- A0/A1- A0/A0  
(True, False, False, -40)(True, True, True, 0)
(True, True, True, 0) 
 
(False, False, False, -239)
- A0/A1        TrueTrue   (True, True, True, 0)- A1/A0(True, False, False, -13)- A1/A0- A0/A0  - A1/A0(True, False, True, -2)- A0/A1TrueTrue(True, True, True, 0)(True, True, True, 0)False- A0/A1(True, True, True, 0)
(True, True, True, 0)
FalseTrueTrue  - A0/A1   False- A0/A1- A1/A1(True, False, False, -151)   - A0/A1

  
   

- A0/A1

True- A1/A0(True, True, True, 0)- A1/A0 False(True, True, True, 0)(True, False, False, -60)
   (True, False, True, -1)True (True, True, True, 0)TrueTrueTrue(True, True, True, 0)True- A1/A0 
   (False, False, False, -84) 
 (True, True, True, 0)(True, True, True, 0)False 
(True, False, False, -147) 


- A0/A1  
(True, True, True, 0)(False,

Processing:   8%|███████████████                                                                                                                                                                                       | 5/66 [00:01<00:17,  3.41it/s]

  TrueFalse   - A1/A0 
 (True, False, False, -45)
(True, True, True, 0)
  - A1/A0
- A1/A1 
(True, True, True, 0)- A1/A1  

True(True, True, True, 0)False False(True, True, True, 0)
 (True, False, True, -1)False False- A1/A1True  True- A1/A0  (True, False, False, -147)(False, False, False, -84)
 
(True, True, True, 0)
 - A1/A1False - A0/A0
True
 
(True, False, True, -1)(True, False, True, -1)
 True(False, False, False, -239)  True- A1/A1 - A1/A0 True
True - A1/A1
- A0/A1(True, True, True, 0)- A1/A1- A1/A0  - A1/A0(False, False, False, -213)
False False
 True (True, True, True, 0)

- A1/A1
(False, False, False, -70) 
   - A1/A0 True- A1/A0True - A1/A1 
False
- A1/A0(True, True, True, 0)

(True, True, True, 0)16   - A1/A1(True, True, True, 0)- A1/A1(True, False, False, -170)True(True, True, True, 0) (True, False, False, -40) 

(True, True, True, 0)False 
   (False, False, False, -83) TrueFalse  - A1/A1  
 (False, False, False, -72) (True, False, False, -12)- A1/A1
 
(True, True, True, 0)


Processing:   9%|██████████████████                                                                                                                                                                                    | 6/66 [00:02<00:17,  3.44it/s]

- A0/A0
True(True, True, True, 0)
True




    
False
 
(True, False, False, -59) - A1/A0- A0/A0
- A0/A1 
32

- A0/A1- A0/A1
(True, True, True, 0)
 (True, True, True, 0)- A0/A0(True, True, True, 0)final assemblyFalse


(True, True, True, 0)   2 TrueFalse - A0/A1   (True, True, True, 0) - A0/A0 
 
 (True, False, True, -1)(True, True, True, 0)False (True, True, True, 0)- A0/A1
poss assemblies
 (True, False, False, -121)(True, True, True, 0)True- A1/A1<class '__main__.OverlapError'>: No overlaps found between F4 and F5 - A0/A1True (True, False, True, -2)

True True- A0/A0 final assembly
  
- A0/A1- A0/A0(True, True, True, 0) - A0/A0 
 

True
 (False, False, False, -102) 
True
 True


True- A0/A01(True, True, True, 0)(True, False, False, -75)
- A0/A1   False - A1/A0 True(True, True, True, 0)(True, False, False, -34)True




 - A0/A1   (True, True, True, 0)True(True, True, True, 0)
(True, False, False, -14) False
  
 final assembly
 (True, True, True, 0)(True, True, True, 0)True- A0/A1 
  (

IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)




- A1/A1 (True, True, True, 0) True- A1/A1
 
(True, False, False, -14)4  Falseposs assemblies


416  final assemblyposs assemblies



2
 <class '__main__.AssemblyError'>: More than 2 possible sequences - unable to tile across frags

final assembly





Processing:  17%|████████████████████████████████▊                                                                                                                                                                    | 11/66 [00:03<00:18,  3.05it/s]

- A0/A0 (False, False, False, -56) False
- A0/A1 (True, False, True, -1) True
- A1/A0 (True, False, True, -1) True
- A1/A1 (False, False, False, -58) False

- A0/A0 (True, True, True, 0) True
- A0/A1 (False, False, False, -465) False
- A1/A0 (False, False, False, -465) False
- A1/A1 (True, True, True, 0) True

- A0/A0 (True, True, True, 0) True
- A0/A1 (False, False, False, -266) False
- A1/A0 (False, False, False, -266) False
- A1/A1 (True, True, True, 0) True

- A0/A0 (True, False, False, -118) False
- A0/A1 (True, True, True, 0) True
- A1/A0 (True, True, True, 0) True
- A1/A1 (True, False, False, -118) False

2 poss assemblies
2 final assembly





Processing:  17%|████████████████████████████████▊                                                                                                                                                                    | 11/66 [00:45<03:46,  4.12s/it]

- A0/A0 (False, False, False, -81) False
- A0/A1 (True, True, True, 0) True
- A0/A0 (True, False, True, -1) True
- A1/A0 - A0/A0(True, False, True, -2)  (True, True, True, 0)True
 True
- A1/A1 (False, False, False, -81) False

- A0/A1 (False, False, False, -71) False- A0/A1
 - A0/A0 (False, False, False, -87)(True, True, True, 0)  




False- A0/A0- A0/A0  (True, False, True, -1)(False, False, False, -88)  TrueFalse

- A0/A1 (False, False, False, -90) False
- A0/A0- A0/A1  (True, True, True, 0)(True, False, True, -3)  TrueTrue- A1/A0

 (False, False, False, -89) False
- A0/A0 (False, False, False, -88) False
- A0/A1 (True, True, True, 0) True 
- A0/A0 (False, False, False, -87)- A0/A0  False(False, False, False, -80)



KeyboardInterrupt



- A1/A0  False(True, True, True, 0) 
True- A0/A0
(False, False, False, -74) False
- A1/A1 (False, False, False, -88) - A0/A1False 
(True, False, True, -1)
 True
- A0/A1 (True, True, True, 0) - A0/A1True 
(True, True, True, 0) True
- A0/A0- A0/A0  (True, True, True, 0)(True, True, True, 0)  TrueTrue

- A1/A0 (True, True, True, 0) True- A1/A0
- A0/A1- A1/A0   (True, False, True, -1)- A0/A1(True, True, True, 0)(True, True, True, 0)    (False, False, False, -158)TrueTrueTrue 


False
- A1/A1 (False, False, False, -73) False

- A1/A1 (False, False, False, -88) False

- A0/A0 - A1/A0- A1/A1(True, True, True, 0)   (False, False, False, -80)(True, True, True, 0)True- A1/A0 
  TrueFalse(False, False, False, -158)

 
False
- A0/A0 (True, True, True, 0)- A0/A1  True(False, False, False, -345) 
False
- A1/A1 (True, True, True, 0) - A1/A0True - A1/A1(False, False, False, -345)
- A0/A0- A0/A1  
  False(True, True, True, 0)(False, False, False, -396)(False, False, False, -172)
   TrueFalseFalse



- 

Process ForkProcess-87:
Traceback (most recent call last):
  File "/usr/lib/python3.13/concurrent/futures/process.py", line 254, in _process_worker
    r = call_item.fn(*call_item.args, **call_item.kwargs)


- A0/A0

  File "/mnt/ext4/home/richardn/TGS/TgsToolset/src/tgsts/utils/parallel.py", line 322, in _run_and_catch
    result = func(*args, **kwargs)


 

  File "/tmp/ipykernel_1195282/3833288232.py", line 162, in try_init
    return cls(sample, *args, **kwargs)


(True, False, True, -1) 

  File "/tmp/ipykernel_1195282/3833288232.py", line 152, in __init__
    self.assemblies = self.assemble()
                      ~~~~~~~~~~~~~^^


True

  File "/tmp/ipykernel_1195282/3833288232.py", line 282, in assemble
    assembled1 = AssembledSequence(*overlaps1)





  File "/tmp/ipykernel_1195282/558165607.py", line 136, in __new__
    obj.cds = get_erap_cds(seq)
              ~~~~~~~~~~~~^^^^^


- A0/A0- A0/A0

  File "/tmp/ipykernel_1195282/1445845419.py", line 39, in get_erap_cds
    res = annotator('ace78ee52a044de361c280b47d9e8', gen_str, fast_alignment=True)


  (True, False, True, -1)

  File "/mnt/ext4/home/richardn/TGS/py_ipd_sfat/src/sfat/annotation/annotator.py", line 66, in __call__
    aligned_query, aligned_reference = aligner.align(
                                       ~~~~~~~~~~~~~^
        query,
        ^^^^^^
        reference['sequence']
        ^^^^^^^^^^^^^^^^^^^^^
    )
    ^


(False, False, False, -56) 

  File "/mnt/ext4/home/richardn/TGS/py_ipd_sfat/src/sfat/align/align.py", line 64, in align
    aligned_query, aligned_reference = self.aligner.align(
                                       ~~~~~~~~~~~~~~~~~~^
        query,
        ^^^^^^
        raw_reference
        ^^^^^^^^^^^^^
    )
    ^


 True

  File "/mnt/ext4/home/richardn/TGS/py_ipd_sfat/src/sfat/align/align.py", line 209, in align
    result = self.parasail.sg_trace(
        query,
    ...<3 lines>...
        self.matrix
    )


False



  File "/mnt/ext4/home/richardn/TGS/tgsenv/lib/python3.13/site-packages/parasail/bindings_v2.py", line 1134, in sg_trace
    return Result(_lib.parasail_sg_trace(
                  ~~~~~~~~~~~~~~~~~~~~~~^
        b(s1), len(s1), b(s2), len(s2), open, extend, matrix),
        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
KeyboardInterrupt

During handling of the above exception, another exception occurred:

Traceback (most recent call last):


- A0/A1

  File "/usr/lib/python3.13/multiprocessing/process.py", line 313, in _bootstrap
    self.run()
    ~~~~~~~~^^


 

  File "/usr/lib/python3.13/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
    ~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^


(False, False, False, -73)

  File "/usr/lib/python3.13/concurrent/futures/process.py", line 256, in _process_worker
    exc = _ExceptionWithTraceback(e, e.__traceback__)


 

  File "/usr/lib/python3.13/concurrent/futures/process.py", line 137, in __init__
    tb = ''.join(format_exception(type(exc), exc, tb))
                 ~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^


False

  File "/usr/lib/python3.13/traceback.py", line 154, in format_exception
    te = TracebackException(type(value), value, tb, limit=limit, compact=True)





  File "/usr/lib/python3.13/traceback.py", line 1052, in __init__
    self.stack = StackSummary._extract_from_extended_frame_gen(
                 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^
        _walk_tb_with_full_positions(exc_traceback),
        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
        limit=limit, lookup_lines=lookup_lines,
        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
        capture_locals=capture_locals)
        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^


- A0/A1

  File "/usr/lib/python3.13/traceback.py", line 500, in _extract_from_extended_frame_gen
    f.line


 

  File "/usr/lib/python3.13/traceback.py", line 373, in line
    self._set_lines()
    ~~~~~~~~~~~~~~~^^


(True, True, True, 0)

  File "/usr/lib/python3.13/traceback.py", line 351, in _set_lines
    line = linecache.getline(self.filename, lineno).rstrip()
           ~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^


 

  File "/usr/lib/python3.13/linecache.py", line 26, in getline
    lines = getlines(filename, module_globals)


- A0/A1True

  File "/usr/lib/python3.13/linecache.py", line 42, in getlines
    return updatecache(filename, module_globals)


 


  File "/usr/lib/python3.13/linecache.py", line 172, in updatecache
    lines = fp.readlines()


(False, False, False, -101)

  File "/usr/lib/python3.13/codecs.py", line 322, in decode
    def decode(self, input, final=False):
    


 

KeyboardInterrupt


False
- A1/A0 (False, False, False, -75) False
- A1/A0 (True, False, True, -2)- A1/A0  True(False, False, False, -114)
 False
- A1/A1 (True, False, True, -1) True

- A1/A1 (False, False, False, -58)- A1/A1  False(True, False, False, -12)
 False

<class '__main__.OverlapError'>: F1 could not overlap both frags to F2

- A0/A0 (True, True, True, 0) True
- A0/A0 (True, False, True, -6) True


Process ForkProcess-89:
Process ForkProcess-68:
Process ForkProcess-74:
Process ForkProcess-85:
Process ForkProcess-93:
Process ForkProcess-82:
Process ForkProcess-80:
Process ForkProcess-86:
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "/usr/lib/python3.13/multiprocessing/process.py", line 313, in _bootstrap
    self.run()
    ~~~~~~~~^^
  File "/usr/lib/python3.13/multiprocessing/process.py", line 313, in _bootstrap
    self.run()
    ~~~~~~~~^^
  File "/usr/lib/python3.13/multiprocessing/process.py", line 313, in _bootstrap
    self.run()
    ~~~~~~~~^^
  File "/usr/lib/python3.13/multiprocessing/process.py", line 313, in _bootstrap
    self.run()
    ~~~~~~~~^^
  File "/usr/lib/python3.13/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)

In [112]:
output_path = 'assembled.csv'

import csv

with open(output_path, 'w', newline='\n') as csvfile:
    writer = csv.writer(
        csvfile, 
        delimiter=',',
        quotechar='"', 
        quoting=csv.QUOTE_MINIMAL
    )
    writer.writerow(TilingRow.header)
    for tiling in tilings:
        for row in tiling.rows:
            writer.writerow(row)

A
