In [23]:
import os
import gzip
import itertools
from toolz import partition, sliding_window
from toolz import itertoolz
from collections import Counter, defaultdict

In [2]:
codon_table_std_RNA = {'AAA': 'K',
                       'AAC': 'N',
                       'AAG': 'K',
                       'AAU': 'N',
                       'ACA': 'T',
                       'ACC': 'T',
                       'ACG': 'T',
                       'ACU': 'T',
                       'AGA': 'R',
                       'AGC': 'S',
                       'AGG': 'R',
                       'AGU': 'S',
                       'AUA': 'I',
                       'AUC': 'I',
                       'AUG': 'M',
                       'AUU': 'I',
                       'CAA': 'Q',
                       'CAC': 'H',
                       'CAG': 'Q',
                       'CAU': 'H',
                       'CCA': 'P',
                       'CCC': 'P',
                       'CCG': 'P',
                       'CCU': 'P',
                       'CGA': 'R',
                       'CGC': 'R',
                       'CGG': 'R',
                       'CGU': 'R',
                       'CUA': 'L',
                       'CUC': 'L',
                       'CUG': 'L',
                       'CUU': 'L',
                       'GAA': 'E',
                       'GAC': 'D',
                       'GAG': 'E',
                       'GAU': 'D',
                       'GCA': 'A',
                       'GCC': 'A',
                       'GCG': 'A',
                       'GCU': 'A',
                       'GGA': 'G',
                       'GGC': 'G',
                       'GGG': 'G',
                       'GGU': 'G',
                       'GUA': 'V',
                       'GUC': 'V',
                       'GUG': 'V',
                       'GUU': 'V',
                       'UAA': '*',
                       'UAC': 'Y',
                       'UAG': '*',
                       'UAU': 'Y',
                       'UCA': 'S',
                       'UCC': 'S',
                       'UCG': 'S',
                       'UCU': 'S',
                       'UGA': '*',
                       'UGC': 'C',
                       'UGG': 'W',
                       'UGU': 'C',
                       'UUA': 'L',
                       'UUC': 'F',
                       'UUG': 'L',
                       'UUU': 'F'}


codon_table_std_DNA = {'AAA': 'K',
                       'AAC': 'N',
                       'AAG': 'K',
                       'AAT': 'N',
                       'ACA': 'T',
                       'ACC': 'T',
                       'ACG': 'T',
                       'ACT': 'T',
                       'AGA': 'R',
                       'AGC': 'S',
                       'AGG': 'R',
                       'AGT': 'S',
                       'ATA': 'I',
                       'ATC': 'I',
                       'ATG': 'M',
                       'ATT': 'I',
                       'CAA': 'Q',
                       'CAC': 'H',
                       'CAG': 'Q',
                       'CAT': 'H',
                       'CCA': 'P',
                       'CCC': 'P',
                       'CCG': 'P',
                       'CCT': 'P',
                       'CGA': 'R',
                       'CGC': 'R',
                       'CGG': 'R',
                       'CGT': 'R',
                       'CTA': 'L',
                       'CTC': 'L',
                       'CTG': 'L',
                       'CTT': 'L',
                       'GAA': 'E',
                       'GAC': 'D',
                       'GAG': 'E',
                       'GAT': 'D',
                       'GCA': 'A',
                       'GCC': 'A',
                       'GCG': 'A',
                       'GCT': 'A',
                       'GGA': 'G',
                       'GGC': 'G',
                       'GGG': 'G',
                       'GGT': 'G',
                       'GTA': 'V',
                       'GTC': 'V',
                       'GTG': 'V',
                       'GTT': 'V',
                       'TAA': '*',
                       'TAC': 'Y',
                       'TAG': '*',
                       'TAT': 'Y',
                       'TCA': 'S',
                       'TCC': 'S',
                       'TCG': 'S',
                       'TCT': 'S',
                       'TGA': '*',
                       'TGC': 'C',
                       'TGG': 'W',
                       'TGT': 'C',
                       'TTA': 'L',
                       'TTC': 'F',
                       'TTG': 'L',
                       'TTT': 'F'}


In [3]:
def codons(sequence, mol_type='RNA'):
    """Return a generator of all codons(substring of length 3) with no-overlap window
    from a sequence(string) of DNA/RNA."""
    seq = sequence.upper()
    if mol_type == 'RNA':
        seq = seq.replace('T', 'U')
        return (''.join(c) for c in partition(3, seq))
    elif mol_type == 'DNA':
        return (''.join(c) for c in partition(3, seq))

In [4]:
dna = 'ATGCTTTTA'
list(codons(dna, 'DNA'))

['ATG', 'CTT', 'TTA']

In [5]:
def kmers(sequence, k):
    """Returns a generator of all mers(substring) of length k with overlap window
    from a string."""
    return (''.join(c) for c in sliding_window(k, sequence))

In [6]:
list(kmers(dna, 5))

['ATGCT', 'TGCTT', 'GCTTT', 'CTTTT', 'TTTTA']

In [7]:
itertoolz.frequencies(dna)

{'A': 2, 'T': 5, 'G': 1, 'C': 1}

In [8]:
def count_subsatrings(sequence, k):
    mers = kmers(sequence, k)
    return itertoolz.frequencies(mers)

In [9]:
count_subsatrings(dna, 3)

{'ATG': 1, 'TGC': 1, 'GCT': 1, 'CTT': 1, 'TTT': 2, 'TTA': 1}

In [10]:
dna

'ATGCTTTTA'

In [95]:
def get_translation(sequence, mol_type):
    """Translate the codons obtained from a fasta file with genomes CDS."""
    stop_codons = ['UAA', 'UAG', 'UGA', 'TAA', 'TAG', 'TGA']
    codon_list = codons(sequence, mol_type)
    if mol_type == 'RNA':
        codon_map = codon_table_std_RNA
    else:
        codon_map = codon_table_std_DNA
    protein = []
    for codon in codon_list:
        if codon not in codon_map:
            protein.append('?')
        elif codon in codon_map and codon not in stop_codons:
            protein.append(codon_map[codon])
    return ''.join(protein)

In [97]:
get_translation(dna, 'DNA')

'MLL'

In [86]:
seq = "ATGACCATGATTACGGATTCACTGGCCGTCGTTTTACAACGTCGTGACTGGGAAAACCCT\
GGCGTTACCCAACTTAATCGCCTTGCAGCACATCCCCCTTTCGCCAGCTGGCGTAATAGC\
GAAGAGGCCCGCACCGATCGCCCTTCCCAACAGTTGCGCAGCCTGAATGGCGAATGGCGC\
TTTGCCTGGTTTCCGGCACCAGAAGCGGTGCCGGAAAGCTGGCTGGAGTGCGATCTTCCT\
GAGGCCGATACTGTCGTCGTCCCCTCAAACTGGCAGATGCACGGTTACGATGCGCCCATC\
TACACCAACGTGACCTATCCCATTACGGTCAATCCGCCGTTTGTTCCCACGGAGAATCCG\
ACGGGTTGTTACTCGCTCACATTTAATGTTGATGAAAGCTGGCTACAGGAAGGCCAGACG\
CGAATTATTTTTGATGGCGTTAACTCGGCGTTTCATCTGTGGTGCAACGGGCGCTGGGTC\
GGTTACGGCCAGGACAGTCGTTTGCCGTCTGAATTTGACCTGAGCGCATTTTTACGCGCC\
GGAGAAAACCGCCTCGCGGTGATGGTGCTGCGCTGGAGTGACGGCAGTTATCTGGAAGAT\
CAGGATATGTGGCGGATGAGCGGCATTTTCCGTGACGTCTCGTTGCTGCATAAACCGACT\
ACACAAATCAGCGATTTCCATGTTGCCACTCGCTTTAATGATGATTTCAGCCGCGCTGTA\
CTGGAGGCTGAAGTTCAGATGTGCGGCGAGTTGCGTGACTACCTACGGGTAACAGTTTCT\
TTATGGCAGGGTGAAACGCAGGTCGCCAGCGGCACCGCGCCTTTCGGCGGTGAAATTATC\
GATGAGCGTGGTGGTTATGCCGATCGCGTCACACTACGTCTGAACGTCGAAAACCCGAAA\
CTGTGGAGCGCCGAAATCCCGAATCTCTATCGTGCGGTGGTTGAACTGCACACCGCCGAC\
GGCACGCTGATTGAAGCAGAAGCCTGCGATGTCGGTTTCCGCGAGGTGCGGATTGAAAAT\
GGTCTGCTGCTGCTGAACGGCAAGCCGTTGCTGATTCGAGGCGTTAACCGTCACGAGCAT\
CATCCTCTGCATGGTCAGGTCATGGATGAGCAGACGATGGTGCAGGATATCCTGCTGATG\
AAGCAGAACAACTTTAACGCCGTGCGCTGTTCGCATTATCCGAACCATCCGCTGTGGTAC\
ACGCTGTGCGACCGCTACGGCCTGTATGTGGTGGATGAAGCCAATATTGAAACCCACGGC\
ATGGTGCCAATGAATCGTCTGACCGATGATCCGCGCTGGCTACCGGCGATGAGCGAACGC\
GTAACGCGAATGGTGCAGCGCGATCGTAATCACCCGAGTGTGATCATCTGGTCGCTGGGG\
AATGAATCAGGCCACGGCGCTAATCACGACGCGCTGTATCGCTGGATCAAATCTGTCGAT\
CCTTCCCGCCCGGTGCAGTATGAAGGCGGCGGAGCCGACACCACGGCCACCGATATTATT\
TGCCCGATGTACGCGCGCGTGGATGAAGACCAGCCCTTCCCGGCTGTGCCGAAATGGTCC\
ATCAAAAAATGGCTTTCGCTACCTGGAGAGACGCGCCCGCTGATCCTTTGCGAATACGCC\
CACGCGATGGGTAACAGTCTTGGCGGTTTCGCTAAATACTGGCAGGCGTTTCGTCAGTAT\
CCCCGTTTACAGGGCGGCTTCGTCTGGGACTGGGTGGATCAGTCGCTGATTAAATATGAT\
GAAAACGGCAACCCGTGGTCGGCTTACGGCGGTGATTTTGGCGATACGCCGAACGATCGC\
CAGTTCTGTATGAACGGTCTGGTCTTTGCCGACCGCACGCCGCATCCAGCGCTGACGGAA\
GCAAAACACCAGCAGCAGTTTTTCCAGTTCCGTTTATCCGGGCAAACCATCGAAGTGACC\
AGCGAATACCTGTTCCGTCATAGCGATAACGAGCTCCTGCACTGGATGGTGGCGCTGGAT\
GGTAAGCCGCTGGCAAGCGGTGAAGTGCCTCTGGATGTCGCTCCACAAGGTAAACAGTTG\
ATTGAACTGCCTGAACTACCGCAGCCGGAGAGCGCCGGGCAACTCTGGCTCACAGTACGC\
GTAGTGCAACCGAACGCGACCGCATGGTCAGAAGCCGGGCACATCAGCGCCTGGCAGCAG\
TGGCGTCTGGCGGAAAACCTCAGTGTGACGCTCCCCGCCGCGTCCCACGCCATCCCGCAT\
CTGACCACCAGCGAAATGGATTTTTGCATCGAGCTGGGTAATAAGCGTTGGCAATTTAAC\
CGCCAGTCAGGCTTTCTTTCACAGATGTGGATTGGCGATAAAAAACAACTGCTGACGCCG\
CTGCGCGATCAGTTCACCCGTGCACCGCTGGATAACGACATTGGCGTAAGTGAAGCGACC\
CGCATTGACCCTAACGCCTGGGTCGAACGCTGGAAGGCGGCGGGCCATTACCAGGCCGAA\
GCAGCGTTGTTGCAGTGCACGGCAGATACACTTGCTGATGCGGTGCTGATTACGACCGCT\
CACGCGTGGCAGCATCAGGGGAAAACCTTATTTATCAGCCGGAAAACCTACCGGATTGAT\
GGTAGTGGTCAAATGGCGATTACCGTTGATGTTGAAGTGGCGAGCGATACACCGCATCCG\
GCGCGGATTGGCCTGAACTGCCAGCTGGCGCAGGTAGCAGAGCGGGTAAACTGGCTCGGA\
TTAGGGCCGCAAGAAAACTATCCCGACCGCCTTACTGCCGCCTGTTTTGACCGCTGGGAT\
CTGCCATTGTCAGACATGTATACCCCGTACGTCTTCCCGAGCGAAAACGGTCTGCGCTGC\
GGGACGCGCGAATTGAATTATGGCCCACACCAGTGGCGCGGCGACTTCCAGTTCAACATC\
AGCCGCTACAGTCAACAGCAACTGATGGAAACCAGCCATCGCCATCTGCTGCACGCGGAA\
GAAGGCACATGGCTGAATATCGACGGTTTCCATATGGGGATTGGTGGCGACGACTCCTGG\
AGCCCGTCAGTATCGGCGGAATTCCAGCTGAGCGCCGGTCGCTACCATTACCAGTTGGTC\
TGGTGTCAAAAATAA"

In [98]:
get_translation(seq, 'DNA')

'MTMITDSLAVVLQRRDWENPGVTQLNRLAAHPPFASWRNSEEARTDRPSQQLRSLNGEWRFAWFPAPEAVPESWLECDLPEADTVVVPSNWQMHGYDAPIYTNVTYPITVNPPFVPTENPTGCYSLTFNVDESWLQEGQTRIIFDGVNSAFHLWCNGRWVGYGQDSRLPSEFDLSAFLRAGENRLAVMVLRWSDGSYLEDQDMWRMSGIFRDVSLLHKPTTQISDFHVATRFNDDFSRAVLEAEVQMCGELRDYLRVTVSLWQGETQVASGTAPFGGEIIDERGGYADRVTLRLNVENPKLWSAEIPNLYRAVVELHTADGTLIEAEACDVGFREVRIENGLLLLNGKPLLIRGVNRHEHHPLHGQVMDEQTMVQDILLMKQNNFNAVRCSHYPNHPLWYTLCDRYGLYVVDEANIETHGMVPMNRLTDDPRWLPAMSERVTRMVQRDRNHPSVIIWSLGNESGHGANHDALYRWIKSVDPSRPVQYEGGGADTTATDIICPMYARVDEDQPFPAVPKWSIKKWLSLPGETRPLILCEYAHAMGNSLGGFAKYWQAFRQYPRLQGGFVWDWVDQSLIKYDENGNPWSAYGGDFGDTPNDRQFCMNGLVFADRTPHPALTEAKHQQQFFQFRLSGQTIEVTSEYLFRHSDNELLHWMVALDGKPLASGEVPLDVAPQGKQLIELPELPQPESAGQLWLTVRVVQPNATAWSEAGHISAWQQWRLAENLSVTLPAASHAIPHLTTSEMDFCIELGNKRWQFNRQSGFLSQMWIGDKKQLLTPLRDQFTRAPLDNDIGVSEATRIDPNAWVERWKAAGHYQAEAALLQCTADTLADAVLITTAHAWQHQGKTLFISRKTYRIDGSGQMAITVDVEVASDTPHPARIGLNCQLAQVAERVNWLGLGPQENYPDRLTAACFDRWDLPLSDMYTPYVFPSENGLRCGTRELNYGPHQWRGDFQFNISRYSQQQLMETSHRHLLHAEEGTWLNIDGFHMGIGGDDS

In [91]:
def is_header(line):
    return line[0] == '>'

def parse_multi_fasta_file_compressed_or_not(filename):
    if filename.endswith('.gz'):
        opener = lambda filename: gzip.open(filename, 'rt')
    else:
        opener = lambda filename: open(filename, 'r')

    with opener(filename) as f:
        fasta_iter = (it[1] for it in itertools.groupby(f, is_header))
        for name in fasta_iter:
            name = name.__next__()[1:].strip()
            sequences = ''.join(seq.strip() for seq in fasta_iter.__next__())
            yield name, sequences


def fasta_item_counter(filename):
    """It opens and check the number of items in the fasta file."""
    return sum(g for g, _ in itertools.groupby(gzip.open(filename, 'rt'), key=is_header))


def count_fasta_files(filename):
    if filename.endswith('.gz'):
        opener = lambda filename: gzip.open(filename, 'rt')
    else:
        opener = lambda filename: open(filename, 'r')

    with opener(filename) as f:
        return sum(g for g, _ in itertools.groupby(f, key=is_header))


def str_punctuation_strip(word):
    punctuation = '!"#$%&\'()*+,-/:;<=>?@[\\]^`{|}~'
    for _ in word:
        for p in punctuation:
            word = word.replace(p, ' ')
    return word.strip().split()



In [92]:
# find the path to the files
dir_name = '/home/paulo/Documents/uniprot/genomes/refseq'


# making a list of the fasta files
infiles = []
for path, subdirs, files in os.walk(dir_name):
    for name in files:
        input_files = os.path.join(path, name)
        if input_files.endswith('.gz'):
            infiles.append(input_files)


In [94]:
# put the sequences or mers /mers_counts in a dict
mers_count = defaultdict(Counter)
mers_lst = defaultdict(list)
prot_lst = defaultdict(list)
cnt_files = 0

In [34]:
!pwd

/home/paulo/Documents/uniprot/codes


In [100]:
filename = '/home/paulo/Documents/uniprot/genomes/refseq/test/GCF_000006685.1/GCF_000006685.1_ASM668v1_cds_from_genomic.fna.gz'
prot_lst = defaultdict(list)
for name, sequence in parse_multi_fasta_file_compressed_or_not(filename):
    name = '/'.join(str_punctuation_strip(name)[1:4:2])
    prot_lst = get_translation(sequence,'RNA')
    print(prot_lst)


MTRLPLLKRPRRNRKSAAIRSMIRETNMVSSDLIWPIFLKEGSGIREEIPSMPGVYRWSLDTISRELERLCLIGLKAVILFPVIEDQKKDQFGAYASHPYNIVCRGIQEIKKSFPQLCVISDIALDPFTTSGHDGIFYNNEVLNDESVRVYGDIATLHAEMGADIVAPSDMMDGRVRHIREKMDQMGFVNTGILSYSAKYASYLYGPFRDALSSHPQSGDKRQYQMDPANVREALLECRLDEEEGADMVMIKPAGFYLDVIMKAQECTHLPVVAYQVSGEYSMIMAASLHGWLSKEGAISESLLAIKRAGATAIISYATPWVLEWLARDALPF
MKIIVSRGLDLSLKGAPKESGFCGKVDPAFVSVDLRPFAPLPLGVKVSPGDQITAGSPLAEYKSFPGVFITSSVDGEVIEIRRGSKRALLDIVIKKKPGVSQTKFSYDLHALSQKELLEVFKKEGLFTLFKQRPFNIPALPTQSPRDVFINLADNRPFTPSVEKHLSLFSSKEDGYYIFVVGVQAIAKLFGLKPHIVSTDRLSLPTQDLISVAHLHTIAGPYPSGSPSTHIHHIARIRNDRDIVFTISFQEVLSIGHLFLKGFFLGQQVVALAGSALPPSQRKYLITAKGASFKDLLPQEIFSSNDVSLISGDPLTGRLCNKEENPCLGMRDHTITILPNPKTREMFSFLRLGWNKLTVTRTYLSGFFKRKRVFMDMNTNLHGEKRPIIDSEIYEKVSAIAVPVAPLIKALETQNFEEACRLGLLEVSPEDFALPTFIDPSKTEMFAIVKEALIRYAKENVLTPL
MKNNSAQKIIDSIKQILSIYKIDFDPSFGAVLTDDNDLDYQMLIEKTQEKIQELDKRSQEILQQTGMTREQMEVFANNPDNFSPEEWRALETIRSSCNEYKKETEELIKEVTQDISHTSGKSPTPKAKSSSPKKSKKKNWIPL
MDYLEKLQSLMENHPSDFFSLWEEYCFNDVVSGDELIVLLEKIKSSSIAPAFGKIA

MLILALSCGENTCLCAADSPKAKVDASIGNGASFSPFTGEIKGNRVRLRLAPHTDSSIIKELSKGDCLAVLGESKDYYVVAAPEGVRGYVFRTFVLDNVIEGEKVNVRLEPSTSAPILARLSKGTVVKTLGAAQGKWVEIALPKQCVFYVAKNFVKNVGALELYNQKEGQKKIALDLLNSAMSFADAELQKKVEDIDLDAIYKKMNLAQAEEFKDVPGLQPLVQKALERVQEAFLAKSLEKGSHKTVESYKPVETQAQLQPQRQVIEEKNVSVVPEAPVLSQVEEPKSVLTSSSEVEPLQDVGPIKGSLLSHYIRKKGFVKTSPVVEGRESFERSLFEVWVNLQPEEIRNGLTMESFYRDEQKKKRVLTGELEVYPHIVKNNPGDYLLKNGEDVVAFVYATSIDLSKWLGKRVVLECVSRPNNHFAFPAYIVLSIKEGA
MYIFSSSFFFMQHAHNADTFPAWDCLQRNYLHRDRITSFCSYVPVLSTVVGIRTLCNMRRLEKELTERTGGFLCKNDPTIPCSWFPCSIIRKEWPKARATAIQEVLGIKALVSLGALLLKVFRAVKTFFYKNFSFALAIPSNRDSIGSPGYPPSAIPFPKHHFVETPNSF
MDNEDKVSFPAKEEKVLTFWKEQNIFQKTLENRDGSPTFSFYDGPPFATGLPHYGHLLAGTIKDVVCRYATMDGHYVPRRFGWDCHGVPVEYEVEKSLGLTEPGAIDRFGIANFNEECRKIVFRYVDEWKYFVDRIGRWVDFSATWKTMDLSFMESVWWVFHSLYKQGLVYEGTKVVPFSTKLGTPLSNFEAGQNYKEVDDPSVVAKFALQDDQGILLAWTTTPWTLVSNMALAVHPGLTYVRIQDKESGEEYILGQESLARWFPDRESYKWIGQLSGESLVGRRYCPLFPYFQDQQDRGAFRVIPADFIEESEGTGVVHMAPAFGEADFFACQEHNVPLVCPVDNQGCFTSEVTDFVGEYIKFADKGIARRLKNENKLFYQGTIRHRY

MVFSSYKLPDLPYDYDALEPVISAEIMHLHHQKHHQGYINNLNEALKSLDVASATQDLTGLIAINPALRFNGGGHINHSLFWEMLAPQNKGGGTPPRHELLKLIEKFWGSFDNFLKNFISSSAAVQGSGWGWLAFCPKKQELMIQTTANQDPLEATTGMIPLLGVDVWEHAYYLQYKNARLDYLKNFPSIINWDYIESRFVEMSK
MEIPKKKIEEMFDPQTAKNILSWLQDNACDNTLILDLLDKDPEKLKERFGDILTFGTAGLRSLMGVGTNRLNVFTVRRATQSLAQVLKQRYPDEDISVVVGYDTRHHSLEFGQETAKVLAGNGILTYLFQVPEPLALVSYSVREFQAKAGVMITASHNPPAYNGYKVYMSTGGQVLPPMDQEIVKEFQMIDQVLSVDTLDHPCIRLIQEEIEADYEKALHQLQLCREDNRQHGSLLRMSYSPLHGTGVSMVPRILKDWGFSSVCLVEKQMIPDGDFPTIVLPNPEDPEALVLGIQQMLDQKDDLFIATDPDADRIGIVSLEKDGPYRFNGNQIACLLAAHILSRESQKAPLGVEDKVVKSIVTTELLTAITESYGGSVVNVGAGFKYIGEKIELWRSGMERFIFGAEESYGYLYGSHVEDKDAMISAALIAETALQQKLRGYTLRDALLELYEIHGYYANLTESIDLPVDQPNQKQELLERWETQDPLYMSLPGRKLVAFENYKTGEGCDLVTGITYKLALPKMSMLCFYYEGSGRVIVRPSGTEPKIKLYFELKHHFSEFSKERTVRESREKESFEALENFVKETKMRLFRT
MRSVLHLEHKRYFQNHGNILFEKIAPVSDCKKLEAELKQFLKEVAVAKDRYLQRWRENVYRSLPGVHAIVKKARLDRLAAELVHRSKVALVKDLWMQAEEEIFFEDCDCAVVLCLSGEKAGWALFFTGEYPQGVLGWDPKASVIILGFSSAGFPN
MQRTVNIQAVESKLKFTFSQPRLLVTALTHPSYRNEFPSDGEDS

MAENKCSMQDLLDRLPYSFLKKNYLLPIDDLGDKIVLARHLKKTPLEALDEVRLIMQKPLTIVSKEETEIIHGLQKLYSDKEGKASEMLLSMQDGEAQESESDTTELLESQEDSAPVVRLLNLILKEAIEERASDIHFEPVEDLLRIRYRIDGVLHDRHAPPNHLRTALITRIKVLAKLDIAEHRLPQDGRIKLQLGGQEIDMRVSTVPVIHGERVVLRILDKRNVILDIRGLCMLPKMEASFRKAISVPEGILLVTGPTGSGKTTTLYSVLQHLSGPFTNIMTIEDPPEYKLPGIAQIAVKPKIGLTFSRGLRHLLRQDPDVLMVGEIRDQETAEIAIQAALTGHLVVTTLHTNDAVSAIPRLLDMGVESYLLSATMIGAVAQRLVRRICKHCKEICEADVQERALLKALGKDPYAPLYKGRGCSQCFRSGYKGRQGIYEFVDITTTLRSEIALGKPYHVLRGVAEQEGYRPLLEHGVELALAGETTLSEVLRVAKRSE
VKNVLRYGFIGAFCFGSLDIPVFSITVAEKLASIEGKTEAQAPLAHISSFNSELKEANALLKSLYDEALSLRSLGETSQEVWNDLRDRLISAKQRVRALEDLWSAEVSEKGGDPEDYALWNHPETTIYNLVSDYGDEQSIYLIPQNVGAMRITAMSKLVVPKEGFEECLSLLLARLGIGVRQVSPWIKELYLTSKEETGVVGIFGARQDLDVLPSTAHIAFVLSSKNLDARSDVQALRKFANSDTMLIDFIGGKIWLFGVVHEITELLKIYEFLQSDNIRQEHRIVSLSKIDPFEMLAILKAAFREDLAKEGEDSAGVGLKVVPLQNHGRSLFLSGALPIVQKAIDLIRELEEGIENPTDKTVFWYNVKHSDPQELAALLSQVHDIFSSGSGIAGSQDTSVSANKSGAASNGLAVQIDTSIGGTSKEGSTKYGSFIADSKTGTLIMVIEKEALPKIKMLLKKLDVPKKMVRIEVLLFERKLSSQRKSGLNLLRLGEEVC