In [1]:
import os
import sys
import multiprocessing
import warnings

import pandas as pd
import numpy as np

from Bio import SeqIO
from glob import glob
from functools import partial
sys.path.append('../../')
from hamp_pred.external.lbs.sequence import mmseqs2

In [2]:
data_dir = '../../data/input'
out_path = os.path.join(data_dir, 'af2_full')

# Get HAMP sequences

In [3]:
# read and cluster sequences from https://pubmed.ncbi.nlm.nih.gov/20184894/
msa = list(SeqIO.parse(os.path.join(data_dir, 'hamp_msa.fasta'), 'fasta'))

msa_df = pd.DataFrame(
        [(str(i.seq).replace('-', ''), i.id) for i in msa],
    columns=['sequence', 'id']
)

In [4]:
len(msa)

6456

In [5]:
# remove redundancy at 70% ident and 70% coverage
cl = mmseqs2.MMSeqsClusterer()
msa_df_clustered = cl.cluster(msa_df, min_identity=0.7, coverage=0.7)

createdb /tmp/s_7r99v0 tmp/2277846130962167930/input --max-seq-len 65535 --dont-split-seq-by-len 1 --dbtype 0 --dont-shuffle 1 --id-offset 0 --compressed 0 -v 3 

Converting sequences
[
Time for merging files: 0h 0m 0s 8ms
Time for merging files: 0h 0m 0s 6ms
Time for merging files: 0h 0m 0s 0ms
Time for processing: 0h 0m 0s 42ms
kmermatcher tmp/2277846130962167930/input tmp/2277846130962167930/clu_tmp/9752797917747901279/linclust/13067973857597857346/pref --sub-mat blosum62.out --alph-size 13 --min-seq-id 0.7 --kmer-per-seq 21 --adjust-kmer-len 0 --mask 0 --mask-lower-case 0 --cov-mode 0 -k 0 -c 0.7 --max-seq-len 65535 --hash-shift 5 --split-memory-limit 0 --include-only-extendable 0 --skip-n-repeat-kmer 0 --threads 20 --compressed 0 -v 3 

Database size: 6456 type: Aminoacid
Reduced amino acid alphabet: (A S T) (C) (D B N) (E Q Z) (F Y) (G) (H) (I V) (K R) (L J M) (P) (W) (X) 

Estimated memory consumption 2 MB
Generate k-mers list for 1 split
Sort kmer 0h 0m 0s 6ms
Sort by rep. sequ

Index statistics
Entries:          83252
DB size:          488 MB
Avg k-mer size:   0.001301
Top 10 k-mers
    DEGLTF	82
    EIQAFN	62
    DEGLNF	49
    EIDAFN	35
    RREGAR	32
    EIQSFN	21
    PVSDGQ	20
    IGMREV	19
    DEGMTV	16
    PVSDGR	14
Time for index table init: 0h 0m 0s 523ms
k-mer similarity threshold: 154
	k-mers per position = 0.475694, k-mer match probability: 0.000000
k-mer match probability: 0.000000

Starting prefiltering scores calculation (step 1 of 1)
Query db start  1 to 5561
Target db start  1 to 5561

0.473296 k-mers per position
27 DB matches per sequence
0 overflows
6 sequences passed prefiltering per query sequence
3 median result list length
0 sequences with 0 size result lists

Time for prefiltering scores calculation: 0h 0m 0s 9ms
Time for merging files: 0h 0m 0s 7ms
Time for processing: 0h 0m 0s 771ms
align tmp/2277846130962167930/clu_tmp/9752797917747901279/input_step_redundancy tmp/2277846130962167930/clu_tmp/9752797917747901279/input_step_redundancy t

In [6]:
msa_df_clustered = msa_df_clustered.groupby('clust_id').head(1)
len(msa_df_clustered)

5388

In [7]:
msa_df_clustered.to_pickle(os.path.join(data_dir, 'hamp_master.p'))

# Unpack AF2 models

In [8]:
def extract_models(models_dir, out_path, df_clustered):
    for out_file in glob(os.path.join(models_dir, '*', '*result.zip')):

        #print(out_file)
        
        seq_id = int(out_file.split('/')[-1].replace('.result.zip', '').split("_")[1])
        assert seq_id in df_clustered.index

        tmp = os.path.join(out_path, str(seq_id))
        
        if not os.path.exists(tmp):
        
            os.system(f'mkdir -p {tmp}')

            # -n ensures that existing files are not overwritten
            os.system(f'unzip -n -j "{out_file}" "*relaxed*" -d {tmp}')
        
        #break

In [9]:
#
# af2 model files are avaliable upon request 
#
df_clustered = pd.read_pickle(os.path.join(data_dir, 'hamp_master.p'))

# get aligned sequences
msa = list(SeqIO.parse(os.path.join(data_dir, 'hamp_msa.fasta'), 'fasta'))
hampid2alnseq = pd.DataFrame(
        [(str(i.seq), i.id) for i in msa],
    columns=['sequence', 'id']
)

In [10]:
len(df_clustered), len(msa), len(hampid2alnseq)

(5388, 6456, 6456)

In [11]:
# unzip af2 models bundles
# uncomment me
models_dir = '/home/nfs/jludwiczak/calc/hamp_olek/hamp_final/out'

extract_models(models_dir, out_path, df_clustered)

# Analyses AF2 models


In [12]:
### Measure with SamCC
### Get AF2 scores
### Store PDB file link

In [13]:
# To measure af2 hamp models use samCC turbo https://academic.oup.com/bioinformatics/article/36/22-23/5368/6039120
sys.path.append('../../hamp_pred/')
from utils.measure import measure_one_HAMP, get_ref_crick
from utils.tools import diffangle

In [14]:
def run_multiprocess(func, tasks, n_cores, tasks_per_core=1):  
    stdout_queue = multiprocessing.Queue()
    pool = multiprocessing.Pool(processes=n_cores, initargs=[stdout_queue], maxtasksperchild=tasks_per_core)
    for i, data in enumerate(pool.map(func, tasks), 1):
        yield data
    pool.close()
    pool.join()

In [15]:
# referece Crick angles
crangles = {'a':19.5,'b':122.35,'c':-134.78,'d': -31.92,'e':70.92 ,'f':173.78,'g':-83.35}

# aa names mapping
AA_3_to_1 = {'CYS': 'C', 'ASP': 'D', 'SER': 'S', 'GLN': 'Q', 'LYS': 'K',
             'ILE': 'I', 'PRO': 'P', 'THR': 'T', 'PHE': 'F', 'ASN': 'N', 
             'GLY': 'G', 'HIS': 'H', 'LEU': 'L', 'ARG': 'R', 'TRP': 'W', 
             'ALA': 'A', 'VAL':'V', 'GLU': 'E', 'TYR': 'Y', 'MET': 'M'}

In [16]:
# define ranges for helix 1 and helix 2 in MSA
h1_msa_start = 5 #4
h1_msa_stop = 18

h2_msa_start = 85 # 84
h2_msa_stop = 98

start_hep = 'a' #g

In [17]:
warnings.filterwarnings(action='ignore', category=UserWarning)
import json

In [19]:

data=[]
tmp_idx=-1
for idx in df_clustered.index:
    
    if tmp_idx >= 5000:
        af2_path = '/home/nfs/sdunin/tmp/hamp'
    else:
        af2_path = '/home/nfs/rmadaj/hamp/HAMPpred/clustering/af2_structures'

    
    row = df_clustered.loc[idx]
    group = row['id'].split("|")[1]
    true_id = int(row['id'].split("|")[0])
        
    # aligned sequence
    alnseq = hampid2alnseq.loc[idx].sequence
    
    # cut helix1 and helix2
    h1 = alnseq[h1_msa_start:h1_msa_stop].replace('-', '')
    h2 = alnseq[h2_msa_start:h2_msa_stop].replace('-', '')
    
    if len(h1) != len(h2): 
        continue
        
    seq = row.sequence    
    if seq.find('X')>-1: 
        continue

    # measure model        
    h1_start = seq.find(h1); assert h1_start > -1
    h2_start = seq.find(h2); assert h2_start > -1
    
    a1_start, a1_stop = h1_start+1, h1_start+len(h1)+1
    a2_start, a2_stop = h2_start+1, h2_start+len(h2)+1
    chain1, chain2 = 'A', 'B'
    
    kwargs = {'a1_start':a1_start, 
              'a1_stop':a1_stop,
              'a2_start':a2_start,
              'a2_stop':a2_stop,
              'chain1':chain1,
              'chain2':chain2}
    
    
    
    mapfunc = partial(measure_one_HAMP, **kwargs)
    
    # get 1 rank af2 model
    # old run
    # hamp_315_A_group_44_unrelaxed_rank_001_alphafold2_multimer_v3_model_5_seed_000.pdb

    #pdb_files = sorted(glob(os.path.join(af2_path, str(idx), '*_relaxed*.pdb')), key=lambda x:int(x.split('/')[-1].split("_")[4]))
    
    tmp_idx +=1 
    
    #tmp_idx = df_clustered.index.get_loc(idx)
    #print(idx, tmp_idx)
    
    pdb_files = glob(os.path.join(af2_path, str(tmp_idx), '*_unrelaxed*rank_001*.pdb'))
    
    # analyse only rank 1 model
    pdb_files = [pdb_files[0]]
        
    # analyse selected models 
    for job, pdb_file in zip(run_multiprocess(mapfunc, pdb_files, len(pdb_files)), pdb_files):    
                
        dir_path, filename = os.path.split(pdb_file)
        _, last_dir = os.path.split(dir_path)
        assert tmp_idx == int(last_dir)
        pdb_file_simple = os.path.join(last_dir, filename)
        
        # parse scores 'max_pae', 'pae', 'plddt', 'ptm'
        json_file = pdb_file[:-4].replace('_unrelaxed_', '_scores_') + ".json"
        scores = json.load(open(json_file))
        
        # measure with SamCC
        
        bundle_df, n_crick_mut, c_crick_mut = job
        
        #    crick = bundle_df.crick.values # in measure_one_HAMP
        #    n_crick = crick[0::2]
        #    c_crick = crick[1::2]
        
        
        n_shift = bundle_df['shift'][0::2].mean()
        c_shift = bundle_df['shift'][1::2].mean()
        
        n_radius = bundle_df['radius'][0::2].mean()
        c_radius = bundle_df['radius'][1::2].mean()
        
        n_A = bundle_df['A'][0::2].mean()
        c_A = bundle_df['A'][1::2].mean()
        
        nn_P = bundle_df['P'][0::2].mean()
        cc_P = bundle_df['P'][1::2].mean()
        
        n_crick_mut = n_crick_mut[2:-2]
        c_crick_mut = c_crick_mut[2:-2]
        
        # assume canonical bundle periodicity for calculating reference Crick angles
        n_P = c_P = 3.5 

        c_phi = n_phi = crangles[start_hep] 

        n_crick_ref = get_ref_crick(n_P, n_phi)[:len(n_crick_mut)]
        c_crick_ref = get_ref_crick(c_P, c_phi)[:len(c_crick_mut)]

        n_crick_diff = diffangle(n_crick_mut, n_crick_ref)
        n_crick_diff = (n_crick_diff[0::2] + n_crick_diff[1::2])/2

        c_crick_diff = diffangle(c_crick_mut, c_crick_ref)
        c_crick_diff = (c_crick_diff[0::2] + c_crick_diff[1::2])/2

        n_crick_diff = np.mean(n_crick_diff)
        c_crick_diff = np.mean(c_crick_diff)

        # rotation asymmetry
        crick_diff = diffangle(n_crick_diff, c_crick_diff) 

        seq1 = bundle_df.res_name[0::4].tolist()
        seq2 = bundle_df.res_name[1::4].tolist()
        seq1 = "".join([AA_3_to_1[res] for res in seq1])
        seq2 = "".join([AA_3_to_1[res] for res in seq2])
        
        assert seq1 == h1 and seq2 == h2

        # add record
        
        print(h1, h2, idx, tmp_idx, group)
        
        data.append(
                (true_id, group, n_crick_diff, c_crick_diff, crick_diff, \
                 h1, h2, n_crick_mut, c_crick_mut, seq, pdb_file_simple,
                 scores['max_pae'], np.mean(scores['plddt']), scores['ptm'], np.mean(scores['pae']),
                 n_shift, c_shift, n_radius, c_radius, n_A, c_A, nn_P, cc_P
                )
            )
        
    #if len(data)>10:break
        
    # debug
    #break
        
data_df = pd.DataFrame(data, columns=['idx', 'group', 'n_rot', 'c_rot', 'rot', 
                                      'n_seq', 'c_seq', 'n_crick_mut', 'c_crick_mut', 'full_seq', 'pdb_file',
                                      'max_pae', 'plddt', 'ptm', 'pae_mean', 
                                      'n_shift', 'c_shift', 'n_radius', 'c_radius', 'n_A', 'c_A', 'n_P', 'c_P'])
assert data_df['idx'].is_unique
data_df.set_index('idx', inplace=True)
len(data_df)

PLKELVQGVQRIA EIGELITSFNLMA 0 0 A_group_13
PIRELVEGVRSIA ELGELIASFNDMA 1 1 A_group_13
PIKELLDGVKNIA ELGELIVNFNEMA 2 2 A_group_13
PLSELRKGIRRVA ELGDLILQFNDMG 14 3 A_group_13
PVKELLRGVRAVA ELGELVNGFNDMA 15 4 A_group_13
PIKELLAGIRNIS QIGELIFSFNNMA 16 5 A_group_13
PQKKLLLGIQNIA QLSTLIISFNEMA 17 6 A_group_13
PLDILSNGVMQIS EFAPVCADFNEMA 18 7 A_group_14
PLELLSYGAEQIK EFGQVCGDFDEMR 19 8 A_group_14
PLELLSYGAGQIE EFAKVCRDFDKMR 20 9 A_group_14
PLQKLSRGTKEIK EFGEVCRDFDEMR 21 10 A_group_14
PLVKLKQAAEKMG EVGELCESFENMR 22 11 A_group_14
PVGKMREATKSIK ELGQLCGDLEDMR 23 12 A_group_14
PLQQLLEATQKIA EMGALCEAFDLMR 24 13 A_group_14
PIRKLSEAAGKIS ELGQLSNTFEDMR 25 14 A_group_14
PLDVLKRATKEMK EIGQLCEDFEEMR 26 15 A_group_14
PLSQLKQASEQIK EIGELFVAFEEMR 27 16 A_group_14
PLKELQYASNEIR EFGEAIKSFEGMR 28 17 A_group_14
PLVKLRKATQNIK EFSELCRDFEEMR 29 18 A_group_14
PLAKLQAAARNIK EIGQLCQDFEEMR 30 19 A_group_14
PLNKLQEATKKIR EIGQLCQDFEEMR 31 20 A_group_14
PLNQLKIGTEKIK EIGELCDAFDSMR 32 21 A_group_14
PITALKEGAQRIK EIGQLNRSF

RIRKIENIAQRVS EIGGLAAAFNRMK 198 136 A_group_26
PLKKMEQLAQRIS EMGMLAASLNRMK 200 137 A_group_26
PLKKMEQLAQRIS EMGMLAASLNRMK 200 137 A_group_26
PIKKMAQLAQKIS EVGMLAASLNRMK 201 138 A_group_26
PITNMSKWSRGVS EIGVLAASVNRLK 202 139 A_group_26
PIVRMAQTAEQVS EIGMLGKAFNRMK 203 140 A_group_26
PLQKMAKIAESVS EIGALAKAFNRMK 204 141 A_group_26
PITHLTRVAEQVS EVGKLAEAFTRMQ 205 142 A_group_26
PITHLTRVAEQVS EVGKLAEAFTRMQ 205 142 A_group_26
SIKKLSTAAYAIS ELVILAESFNKMT 208 143 A_group_27
PITQLSIAAAKIA ELEILAQSFNQMS 209 144 A_group_27
PVLRLTEASREIA ELRTLAHSFNRMT 210 145 A_group_27
PVLRLTEASREIA ELRTLAHSFNRMT 210 145 A_group_27
PLLRVARAAEAIS EIDQLARSFNRMS 211 146 A_group_27
PLLRVARAAEAIS EIDQLARSFNRMS 211 146 A_group_27
PLMSLSQASQEIA ELRILATVFNQMS 212 147 A_group_27
PISRLSQASQAIA ELRILSQSFKTMN 213 148 A_group_27
PISRLSQASQAIA ELRILSQSFKTMN 213 148 A_group_27
PILRMNLASQAIA ELNILTQSFNYMA 215 149 A_group_27
PILRMNLASQAIA ELNILTQSFNYMA 215 149 A_group_27
PILQLNRAAEAIA ELEVLAHSFNQMA 216 150 A_group_27
PILQLNRAAEAIA

PIRAVQEGASRLA EVEVLADEFNRMA 322 238 A_group_44
PIQALNSGAQRLG ELEQLAARFNSMA 323 239 A_group_44
PIQALNSGAQRLG ELEQLAARFNSMA 323 239 A_group_44
PITALRDGAHKLG ELEDLAGQFNRMA 324 240 A_group_44
PITALRDGAHKLG ELEDLAGQFNRMA 324 240 A_group_44
PLGRLSAASREIA ELGELTGSFNAMS 326 241 A_group_44
PLGRLSAASREIA ELGELTGSFNAMS 326 241 A_group_44
PIRRLEVGTQKIS EIGDLTAAFNEMS 327 242 A_group_44
PIRRLEVGTQKIS EIGDLTAAFNEMS 327 242 A_group_44
PIRRLQAGAREIS EIGELTAAFNDMS 330 243 A_group_44
PIKQLRAGACRIG EFGELAVAFNYMS 332 244 A_group_44
PIKQLRAGACRIG EFGELAVAFNYMS 332 244 A_group_44
PIQGLTRQAAAVA EIGRLSSAFNDMT 333 245 A_group_5
PIQGLTRQAAAVA EIGRLSSAFNDMT 333 245 A_group_5
PIKELTKHARAVA EIGQLSQAFNYMT 334 246 A_group_5
PISDMRRQAIEMA EIGQLALSFNNLS 335 247 A_group_5
PISDMRRQAIEMA EIGQLALSFNNLS 335 247 A_group_5
PLSDMRKQAIELA EIGQLATTFNYLT 337 248 A_group_5
PLSDMRKQAIELA EIGQLATTFNYLT 337 248 A_group_5
PIEEMKRQTARIA ELGQLAQAVNNLS 338 249 A_group_5
PIGEMREQAIRIA ELGQLADTFNQLA 339 250 A_group_5
PIVEMRRQADFLG EIGQLSAT

PLSELAAGTRAVA ELGMLTTLFNRMT 454 346 A_group_6
PLSELAAGTRAVA ELGMLTTLFNRMT 454 346 A_group_6
PLLILAEGTQAVA ELGVLTQSFNRMT 456 347 A_group_6
PLLILAEGTQAVA ELGVLTQSFNRMT 456 347 A_group_6
PLRTLARGTRAVA ELGVLIQSFNRMT 457 348 A_group_6
PLRTLARGTRAVA ELGVLIQSFNRMT 457 348 A_group_6
PLLRLAAGTQAVG EVGQLTRSFNAMT 459 349 A_group_6
PLLRLAAGTQAVG EVGQLTRSFNAMT 459 349 A_group_6
PLLMLLKGTQAVA ELGMLTRQFNVMT 462 350 A_group_6
PLLMLLKGTQAVA ELGMLTRQFNVMT 462 350 A_group_6
PLLLLAEGTKAVA ELGTLTQSFNMMT 466 351 A_group_6
PLLLLAEGVKQVA ELGGLTRSFADMT 467 352 A_group_6
PVNDLVAAARRVA EIGTLGSAFNRMT 472 353 A_group_63
PLLDLFYATRKVQ EMSTLARAFNQMT 473 354 A_group_63
PIGRLASAAHTVR ELHALGHSFNAMT 474 355 A_group_63
PLAELVSAARTIG EIGLLARAFNRMT 475 356 A_group_63
PLTDLVAAARKVG EIGLLNRAFNRMT 476 357 A_group_63
PLTDLVAAARKVG EIGLLNRAFNRMT 476 357 A_group_63
PISHMVEAVRRIG EIGTLGFAFNRMA 477 358 A_group_63
PISHMVEAVRRIG EIGTLGFAFNRMA 477 358 A_group_63
PIRTLISAADEVS DLANLGETFNKMT 479 359 A_group_63
PIRLLISAADDVA DIGQLSKTFNY

PIVDAAGAVSKIG ELATLGANINDMA 619 449 A_mgroup_100
PILEASSAVQKLA ELASLGSSINRMV 620 450 A_mgroup_100
PILEASSAVQKLA ELASLGSSINRMV 620 450 A_mgroup_100
PITDSADAVDAIG ELAQLGGNINQMA 621 451 A_mgroup_100
PITDSADAVDAIG ELAQLGGNINQMA 621 451 A_mgroup_100
PIQRLATAAEQVR EIGDLSRSLKAMT 622 452 A_mgroup_101
PIRRLALAAERVR EIGELSLALKEMT 623 453 A_mgroup_101
PIRRLALAAERVR EIGELSLALKEMT 623 453 A_mgroup_101
PLRKLSAAADRVR EVGHLSTSIREMT 624 454 A_mgroup_101
PLRKLSAAADRVR EVGHLSTSIREMT 624 454 A_mgroup_101
PIRHLAEAAERVR EIGHLSGALRDMT 625 455 A_mgroup_101
PIRHLAEAAERVR EIGHLSGALRDMT 625 455 A_mgroup_101
PLLRLAASAHVIR EIGEVARALQDSA 627 456 A_mgroup_101
PLLRLAASAHVIR EIGEVARALQDSA 627 456 A_mgroup_101
PVRRLAAAADRVR EIGHLARSFREMT 628 457 A_mgroup_101
PVRRLAAAADRVR EIGHLARSFREMT 628 457 A_mgroup_101
PLLRLAGAADRVR ELGDLSAALREMT 630 458 A_mgroup_101
PLLRLAGAADRVR ELGDLSAALREMT 630 458 A_mgroup_101
PVLRMASAADRVR ELGDLSRALEEMT 631 459 A_mgroup_101
PVRRLAQAAETVR EIGELSGALREMT 632 460 A_mgroup_101
PLRRLARAAVRVR EIGTLA

PLRRLAEGTRRVA EMGQLVDSFNRMT 780 568 A_mgroup_114
PLRRLAEGTRRVA EMGQLVDSFNRMT 780 568 A_mgroup_114
PLEKIVDATKRVA EMGMLIDSFNQMT 781 569 A_mgroup_114
PLEKIVDATKRVA EMGMLIDSFNQMT 781 569 A_mgroup_114
PIRELQEAINRVR EIGVLQAGFNEMM 782 570 A_mgroup_30
PIRELQEAINRVR EIGVLQAGFNEMM 782 570 A_mgroup_30
PIRSVSLGMAKVA ELGALQTGFNRMA 783 571 A_mgroup_30
PIRSVSLGMAKVA ELGALQTGFNRMA 783 571 A_mgroup_30
PVRQLSQAIDRVQ EIGLLQVGFNRMM 784 572 A_mgroup_30
PVRQLSQAIDRVQ EIGLLQVGFNRMM 784 572 A_mgroup_30
PLRQLRWALSEVQ ELGLLQAGFNDMV 785 573 A_mgroup_30
PLRQLRWALSEVQ ELGLLQAGFNDMV 785 573 A_mgroup_30
PVRVVRAALRRVE ELGELQRGFNAMV 786 574 A_mgroup_30
PVKVMSSKASEIS EIAQLGRSFNRMH 789 575 A_mgroup_41
PVKVMSSKASEIS EIAQLGRSFNRMH 789 575 A_mgroup_41
PVRKLSAMADRVS EIGGLTASFNRMY 790 576 A_mgroup_41
PVRKLSAMADRVS EIGGLTASFNRMY 790 576 A_mgroup_41
PVTRMAEQANLIS EIASMASSFNRMH 791 577 A_mgroup_41
PITRLAALANQVS EVAELAASFARMR 792 578 A_mgroup_41
PIQRVAANADATS EISVLAAAFNRMH 793 579 A_mgroup_41
PIQRVAANADATS EISVLAAAFNRMH 793 579 

PIGQMVEAADRLA EIGKLAESFKRMI 904 668 A_mgroup_53
PIRQLMKASEKIA EAGKLAESFKRMS 905 669 A_mgroup_53
NIGQVIHAADALA ETGMLAEAFGKMI 906 670 A_mgroup_53
NIGQVIHAADALA ETGMLAEAFGKMI 906 670 A_mgroup_53
PVNRLVIAAEKIA EIGMLASAFKRMS 907 671 A_mgroup_53
PVNRLVIAAEKIA EIGMLASAFKRMS 907 671 A_mgroup_53
SIKKLMSAADELA ETGKLAHSFNKMV 908 672 A_mgroup_53
SIKKLMSAADELA ETGKLAHSFNKMV 908 672 A_mgroup_53
PITEIMYSANKIA EIGILENSFKKMA 909 673 A_mgroup_53
PITEIMYSANKIA EIGILENSFKKMA 909 673 A_mgroup_53
PVNKIIRIADKIA EIGLLALSFRRMA 910 674 A_mgroup_53
PMRKLEDAALKAS EIESLAAAFNSLL 911 675 A_mgroup_54
PIQKLEEGAKKVA ELRSLALAYNEMI 912 676 A_mgroup_54
PLHRLEQAAIKAA EIRSLGIAFNEML 913 677 A_mgroup_54
PLHRLEQAAIKAA EIRSLGIAFNEML 913 677 A_mgroup_54
PLQHLEEAATKAA EIRSLAVAFNHML 914 678 A_mgroup_54
PLQHLEEAATKAA EIRSLAVAFNHML 914 678 A_mgroup_54
PLRKLEEAARKAA EIKSLSVAFNMML 915 679 A_mgroup_54
PLRKLEEAARKAA EIKSLSVAFNMML 915 679 A_mgroup_54
PLQKLEKTALKAA EIRSLGAAFNHML 916 680 A_mgroup_54
PLSALERSARKAA EIRSLGLAYNEML 917 681 A_mg

PLGAMTAALKKLS ELGAMAHAAQVFK 1017 771 B_group_130
PLGAMTAALKKLS ELGAMAHAAQVFK 1017 771 B_group_130
PIFRITRSMEAIA ELGLMARALAVFR 1018 772 B_group_130
PIFRITRSMEAIA ELGLMARALAVFR 1018 772 B_group_130
PIGQLKVVMEAFA EIGEMARTVEVFK 1019 773 B_group_130
PIGQLKVVMEAFA EIGEMARTVEVFK 1019 773 B_group_130
PVQAITRAMSALA EIGEMAQAVDVFK 1021 774 B_group_130
PVQAITRAMSALA EIGEMAQAVDVFK 1021 774 B_group_130
PVVALERRMRGLA EIGKMASALAIFR 1022 775 B_group_130
PVVALERRMRGLA EIGKMASALAIFR 1022 775 B_group_130
PIVDITSSMSRLA EIGAMAAAVQVFK 1023 776 B_group_130
GITSMTAAMGRLA EVGEMAKAVQIFK 1024 777 B_group_130
GITSMTAAMGRLA EVGEMAKAVQIFK 1024 777 B_group_130
PIIQMSDSMRELA EIGLMACTVEVFK 1025 778 B_group_130
PIIQMSDSMRELA EIGLMACTVEVFK 1025 778 B_group_130
PIEEMTSTMGVLA EVGAMADALEVFR 1026 779 B_group_130
PIEEMTSTMGVLA EVGAMADALEVFR 1026 779 B_group_130
PISQMAGVMRKLA EVGEMAGAVEAFK 1027 780 B_group_130
PISQMAGVMRKLA EVGEMAGAVEAFK 1027 780 B_group_130
PIVAMTDAMTKLA EVGDMAAAVQIFK 1028 781 B_group_130
PIVAMTDAMTKLA EVGDMA

PIVSITTIMRRLA EIGDMAGAVEVFK 1132 867 B_group_130
PIVSITTIMRRLA EIGDMAGAVEVFK 1132 867 B_group_130
PIGRMTAAMGRLA EIGLMAAAVQVFK 1133 868 B_group_130
PIGRMTAAMGRLA EIGLMAAAVQVFK 1133 868 B_group_130
PLTGMTDAMSHLA EIGAMASAVQVFR 1134 869 B_group_130
PLTGMTDAMSHLA EIGAMASAVQVFR 1134 869 B_group_130
ILRAMTAAMTQLA EIGGMARAVEIFK 1135 870 B_group_130
ILRAMTAAMTQLA EIGGMARAVEIFK 1135 870 B_group_130
PMAEMTEITARMA EIGKLAHAVEVFR 1136 871 B_group_130
PMAEMTEITARMA EIGKLAHAVEVFR 1136 871 B_group_130
PVKALTGTMGTLA EIGSMARAVEVFR 1138 872 B_group_130
PVKALTGTMGTLA EIGSMARAVEVFR 1138 872 B_group_130
PMTRLNAAMGKMA EIGDIAKTIAVIR 1139 873 B_group_130
PMTRLNAAMGKMA EIGDIAKTIAVIR 1139 873 B_group_130
AVGGMTGAMERLA EIGAMAATVQVFK 1140 874 B_group_130
AVGGMTGAMERLA EIGAMAATVQVFK 1140 874 B_group_130
PLDGLGIAMTRLA EVGRMAAAVQVFK 1141 875 B_group_130
PLDGLGIAMTRLA EVGRMAAAVQVFK 1141 875 B_group_130
PLSAMTGAMTELS EIGAMAKAMEIFK 1142 876 B_group_130
PLSAMTGAMTELS EIGAMAKAMEIFK 1142 876 B_group_130
PIRALEDSMRQIS EIGSMA

PIERIAKRMQELA EIGEMARALEVFR 1263 965 B_group_130
PIERIAKRMQELA EIGEMARALEVFR 1263 965 B_group_130
PIGQLAGVMERLA EFGVMAHTIQVFR 1265 966 B_group_130
PIGQLAGVMERLA EFGVMAHTIQVFR 1265 966 B_group_130
PISGMTLAMTRLA EIGEMAKAMEVFK 1266 967 B_group_130
PISGMTLAMTRLA EIGEMAKAMEVFK 1266 967 B_group_130
PIPKLTSVAANLA EIGALARALEVLR 1267 968 B_group_130
PIPKLTSVAANLA EIGALARALEVLR 1267 968 B_group_130
PIRTLASGIESIS EVGEIGRAVEILR 1268 969 B_group_130
PIRTLASGIESIS EVGEIGRAVEILR 1268 969 B_group_130
PIIAMTAAMGRLA EVGGMAAAMQTFK 1270 970 B_group_130
PIIAMTAAMGRLA EVGGMAAAMQTFK 1270 970 B_group_130
PVRAMTDVMGTLS EIGEMARSVVVFK 1271 971 B_group_130
PVRAMTDVMGTLS EIGEMARSVVVFK 1271 971 B_group_130
PLARLAGVTREVA EIGTMANALDRFR 1272 972 B_group_130
PLARLAGVTREVA EIGTMANALDRFR 1272 972 B_group_130
NLTAIAGAMTRLA ELGDLARAFNVFA 1274 973 B_group_130
PLERMTTVMKSLA EVGAMAQAVLVFR 1275 974 B_group_130
PLERMTTVMKSLA EVGAMAQAVLVFR 1275 974 B_group_130
PILSLCEAMRGLA EVGQMIDAVGVFR 1279 975 B_group_130
PILSLCEAMRGLA EVGQMI

PIEQICLRMESIA EIGLIAQTLLRMK 1395 1063 B_group_130
PIEQICLRMESIA EIGLIAQTLLRMK 1395 1063 B_group_130
PLHQIAVGMDRVA EIGDIGKSLLSLQ 1396 1064 B_group_130
PIGRICQDMESVA EIGQIGKTLVSMQ 1397 1065 B_group_130
PIGRICQDMESVA EIGQIGKTLVSMQ 1397 1065 B_group_130
PIDGLCARMSEIS EFGHMGQILRTMQ 1398 1066 B_group_130
PIDGLCARMSEIS EFGHMGQILRTMQ 1398 1066 B_group_130
PLTRLVAVLQRMA EIGQVGKAVADIR 1400 1067 B_group_130
PLTRLVAVLQRMA EIGQVGKAVADIR 1400 1067 B_group_130
PMRKLRSTMKQAS EIGQLASSFNKMM 1401 1068 C_group_131
PIYHLVSASDTIS ELGTLAQSFNKMR 1402 1069 C_group_131
PIYHLVSASDTIS ELGTLAQSFNKMR 1402 1069 C_group_131
KIGKVMTVMSKAA EIGKLALSFNLMI 1403 1070 C_group_131
KIGKVMTVMSKAA EIGKLALSFNLMI 1403 1070 C_group_131
PLKKVIEVTNSVA QLAEMTKNFNKMV 1404 1071 C_group_131
PINKLSIGANLIS ELGQLGKAFNKMS 1405 1072 C_group_131
PINKLSIGANLIS ELGQLGKAFNKMS 1405 1072 C_group_131
PLNAMVFVSHRLA ELGDLGKAFNSMI 1406 1073 C_group_131
PLNAMVFVSHRLA ELGDLGKAFNSMI 1406 1073 C_group_131
PIVLLEQSVAKIA EVGKLGTAFNQMS 1407 1074 C_group_131


PLQSAVQLAQSVA EVGQLLQALKGMN 1526 1170 C_group_131
PLRSAVDVSNRLS ETGQLLAAMHNMV 1527 1171 C_group_131
PLRSAVDVSNRLS ETGQLLAAMHNMV 1527 1171 C_group_131
PIREALGVAEKVA ETGQLLSALNNMN 1528 1172 C_group_131
PLQKGIAFAQEIA EVGKMADALREMI 1529 1173 C_group_131
PLQKGIAFAQEIA EVGKMADALREMI 1529 1173 C_group_131
PLRKVVEFAQAIA ETGQLAEAVNTMS 1530 1174 C_group_131
PLNEAVALADSIA ELGLLLKSLNAMA 1531 1175 C_group_131
PLKEAVTVASRAA ELGQLMSALKEMV 1532 1176 C_group_131
PLKEAVTVASRAA ELGQLMSALKEMV 1532 1176 C_group_131
PLREAVEVANRLA ETGQLLAAMGNMV 1533 1177 C_group_131
PMNQAVKVAQTVA ETGRLLQALKDMN 1534 1178 C_group_131
PIREAVSLATRVA EFGQLLSALGTMV 1536 1179 C_group_131
PLHRAVSVAESVA EMGQLLRALDAMA 1537 1180 C_group_131
PLRRAVALAEAVA ETGQLLAALRRMN 1538 1181 C_group_131
PINACVDAANKIA ETGILQAAMNKMA 1539 1182 C_group_131
PLQQAVSLAQAVA ETGQLLQALQHMN 1540 1183 C_group_131
PLRETLAVANRIA EPGLLMQAVGTMN 1541 1184 C_group_131
PLRQTVAVAERIA ETGHLLRSLADMR 1542 1185 C_group_131
PLKQALRQAERIA ELGQLQGSMRAMT 1543 1186 C_group_131


PMLELTATMHDIA EVGQLANHMNTFI 1660 1287 D_group_129
PMLELTATMHDIA EVGQLANHMNTFI 1660 1287 D_group_129
PMQRLNDAIKNIA EFGELSHSFNLFI 1661 1288 D_group_129
PMQRLNDAIKNIA EFGELSHSFNLFI 1661 1288 D_group_129
PMNTMQQALEQMA ETGVLARTFNRFL 1662 1289 D_group_129
PLRNALNAMERIA EMADLGQAFNDFA 1663 1290 D_group_129
PIKRATAMMREIA EIGQWAMSFNTFV 1664 1291 D_group_129
PLKEAITAMKDIA EVAELAQAFNLFV 1665 1292 D_group_129
PLKKISAGMENIA ELGDLARNFNIFT 1666 1293 D_group_129
PLKKISAGMENIA ELGDLARNFNIFT 1666 1293 D_group_129
PLQETVNAMANIA EVTQLAHHFNAFT 1667 1294 D_group_129
PLKQLNIQLETIS EIGHVAESFNAML 1669 1295 D_group_129
PLKQLNIQLETIS EIGHVAESFNAML 1669 1295 D_group_129
PLRTLADSMHEIA EVGLLAGRFNAFV 1670 1296 D_group_129
PLRTLADSMHEIA EVGLLAGRFNAFV 1670 1296 D_group_129
PIKDVVATMEDIA ELGRLARAFNQFI 1671 1297 D_group_129
PLAKALAAMDEIA ELADLGAAFNRFG 1672 1298 D_group_129
PVGRAARMLEEIA EVGALASAFNRFV 1673 1299 D_group_129
PVGRAARMLEEIA EVGALASAFNRFV 1673 1299 D_group_129
GLARIRDAMESIG EVAQIARSFNAFV 1674 1300 D_group_129


EPGQLGTVAQRVA PRGSVLASLGAMQ 1809 1400 E_501
EPSVAQHAAAQIA DSSSLMSSLDAMR 1810 1401 E_501
EPSVAQHAAAQIA DSSSLMSSLDAMR 1810 1401 E_501
EPTAIADIANKLA AETGVYAAMHNMV 1811 1402 E_501
EPTAIADIANKLA AETGVYAAMHNMV 1811 1402 E_501
EPQAATEIMQRVA PAGSLLHALGTMV 1812 1403 E_501
EPQAATEIMQRVA PAGSLLHALGTMV 1812 1403 E_501
EPREVAHIVGEIA PKESTLEAILRMQ 1813 1404 E_501
EPREVAHIVGEIA PKESTLEAILRMQ 1813 1404 E_501
EPHEVARITGEVA PPGSLSDSVQQMC 1814 1405 E_501
DPGYLAQVAGEIA REGGVYHVMRGMV 1815 1406 E_501
DPGYLAQVAGEIA REGGVYHVMRGMV 1815 1406 E_501
EPRYAANVVKAIA GPDSLLGAMNDMR 1816 1407 E_501
EPRYAANVVKAIA GPDSLLGAMNDMR 1816 1407 E_501
EPRYVQQVVEQLA DELSLLAAIRQMR 1817 1408 E_501
EPSTVATITRTIA DTSSVAAAVVAMQ 1818 1409 E_501
EPSTVATITRTIA DTSSVAAAVVAMQ 1818 1409 E_501
EPAQLGEVAKRVA PGGSVLASLGEMQ 1820 1410 E_501
EPAQLGEVAKRVA PGGSVLASLGEMQ 1820 1410 E_501
EPAHATQIAARIA APGSLLVALGDMK 1821 1411 E_501
EPAHATQIAARIA APGSLLVALGDMK 1821 1411 E_501
DPRYAVEVVGHIA DSDSVLAHTNRMR 1823 1412 E_501
EPQQVAELMRQVA QSSLAASIMQTIQ 1824

VKKKAMACVAEFG KKAFINDTIEAVR 1962 1506 E_group_3
VKKLAMGVVAEFG KKAFINDTVERAR 1964 1507 E_group_3
VKKQAMACVAEFG KKAFINEVVDQIR 1965 1508 E_group_3
VKKQAMACVAEFG KKAFINEVVDQIR 1965 1508 E_group_3
SLRAMASAAEQIA PDDVLGTAFARMS 1967 1509 E_group_4
SLRAMASAAEQIA PDDVLGTAFARMS 1967 1509 E_group_4
LQKDVIGSMQKIA EIGPALRVTADTI 1990 1510 E_group_4
LQNDVIGAMNKIA EIAPALNKVVKNV 1991 1511 E_group_4
SSMEKSHLAEAIA ETGDLVRAVCNLE 2043 1512 E_group_4
SLRATAEVADAIA ENDTLGLALKRMV 2044 1513 E_group_4
SLRATAEVADAIA ENDTLGLALKRMV 2044 1513 E_group_4
KIRQATTFIKGIE EKDTLSEALLNMR 2045 1514 E_group_4
AEHKVAGVADALA DEDRLFASLATMV 2046 1515 E_group_4
AEHKVAGVADALA DEDRLFASLATMV 2046 1515 E_group_4
VERNIAELAVTMA EEDKLLRSLMDMV 2047 1516 E_group_4
SFRKLEVFAEEVG EEGDVGQSLAKMQ 2048 1517 E_group_4
SFRKLEVFAEEVG EEGDVGQSLAKMQ 2048 1517 E_group_4
PVNEAMRLAGCYA EFVAYRDALNTIG 2049 1518 E_group_5
PVNEAMRLAGCYA EFVAYRDALNTIG 2049 1518 E_group_5
PLNETLRVAERYA DLLVLKQKMNQIG 2050 1519 E_group_5
PLNETLRVAERYA DLLVLKQKMNQIG 2050 1519 E_

AVSETASMLDAMA AFDKLKRDANTTA 2259 1604 F_group_126
TLSRFSSLLRAIA VFALMRDDANSTV 2260 1605 F_group_126
SIAEVSRLLSALA VFATMRNDANTTA 2261 1606 F_group_126
SISEVIQVMGAMA AFADMQRYVNTTM 2262 1607 F_group_126
SISEVIQVMGAMA AFADMQRYVNTTM 2262 1607 F_group_126
ALNDVASVLSSVA VFGQLKSDVNQTA 2263 1608 F_group_126
IISELKQVFAAIA SLEQLKHDVNTTV 2264 1609 F_group_126
IISELKQVFAAIA SLEQLKHDVNTTV 2264 1609 F_group_126
MVEELQQVFAALA SLEQLKTDVNATV 2265 1610 F_group_126
MVEELQQVFAALA SLEQLKTDVNATV 2265 1610 F_group_126
VISDVVNMFDGLA QFAKLQTDANATV 2266 1611 F_group_126
SLEHISALLSALS VFAQMRDDANATA 2272 1612 F_group_126
SLEHISALLSALS VFAQMRDDANATA 2272 1612 F_group_126
MIEELMRVFAAMS ALEQLKNDVNATM 2273 1613 F_group_126
MIEELMRVFAAMS ALEQLKNDVNATM 2273 1613 F_group_126
SLNDVMRIATALS IFGQTKDGLNSTI 2274 1614 F_group_126
SLNDVMRIATALS IFGQTKDGLNSTI 2274 1614 F_group_126
NLAELSTLLRAIA VFAVMRDDANSTV 2275 1615 F_group_126
NLAELSTLLRAIA VFAVMRDDANSTV 2275 1615 F_group_126
GVSETQRVIAALA AFGELKRNVNATM 2277 1616 F_group_126


AINSLGAGLRALA SMEMVRKDFNEAL 2381 1697 F_group_301
AINSLGAGLRALA SMEMVRKDFNEAL 2381 1697 F_group_301
AVNQIAQALARLA QFASVRDDFNDAV 2382 1698 F_group_301
AVNQIAQALARLA QFASVRDDFNDAV 2382 1698 F_group_301
AIAALGQGLSHLA AYEPLRADFNTAA 2383 1699 F_group_301
AIAALGQGLSHLA AYEPLRADFNTAA 2383 1699 F_group_301
AVKAVGEALARLA EFHGLKADFEQAT 2384 1700 F_group_301
AVKAVGEALARLA EFHGLKADFEQAT 2384 1700 F_group_301
VVATIGEALEKLA RYAALRDNFNEAL 2385 1701 F_group_301
VVATIGEALEKLA RYAALRDNFNEAL 2385 1701 F_group_301
VVRTLSKALMNLA EHKKLRYSINDAM 2387 1702 F_group_301
AVEELGAALERLS EFERLRQDFNKSI 2388 1703 F_group_301
VMTALANSLEKLA EFDKLRQDFNNMV 2389 1704 F_group_301
VMTALANSLEKLA EFDKLRQDFNNMV 2389 1704 F_group_301
FVGVVEVGFERLS EFEPIRAKFNASV 2390 1705 F_group_301
FVGVVEVGFERLS EFEPIRAKFNASV 2390 1705 F_group_301
AMETLGAALERLA EYAKLKDDFNTAV 2391 1706 F_group_301
AMETLGAALERLA EYAKLKDDFNTAV 2391 1706 F_group_301
VIAVLGNAMASLR RYDRLRQDFNDTA 2392 1707 F_group_301
VIAVLGNAMASLR RYDRLRQDFNDTA 2392 1707 F_group_301


DTAQLLAALRAFR VAGELAEAFNDVV 2491 1794 F_group_400
NFNELLQALKSAR GWGEIAQVFNEWM 2492 1795 F_group_400
ELRQLLAGLTAVR LLGEIATVFNGMI 2493 1796 F_group_400
ALNRLLAALVSMR VMSEIAAVFNEVA 2494 1797 F_group_400
DSRQLLRVLTAVR SAGKVADTLNEII 2495 1798 F_group_400
PLQPLLAALRSVH VMEEIASAFNAVV 2496 1799 F_group_400
PLQPLLAALRSVH VMEEIASAFNAVV 2496 1799 F_group_400
GRHELVSALRKLR IATLFNEVVGLNR 2498 1800 F_group_400
DLRPLLAAMTAAR LVAELSTAFNQIM 2499 1801 F_group_400
NINPLLVAMKAAR GLGEVAIVFNQMI 2502 1802 F_group_400
DRRTLLMSLTGLK LDGKIADTFNDVV 2503 1803 F_group_400
DTSTLLKALIAFR MAGKIADTVNEIF 2504 1804 F_group_400
ALKESLEVANGVK QLVELRGVLNGML 2505 1805 G_502
FLSDVKTIATEMK GLLQLKGIFNEVI 2506 1806 G_502
LVSQSLQVIERAK QLNRLRDSVNDLL 2507 1807 G_502
LVSQSLQVIERAK QLNRLRDSVNDLL 2507 1807 G_502
AIQNSIEVTNYVE DLQALRNTINSII 2508 1808 G_502
CVKDAILMVQEIK QLLELKALLNEMV 2509 1809 G_502
CVKDAILMVQEIK QLLELKALLNEMV 2509 1809 G_502
AVKESVTTVGIVE QLIELKSVLNSLL 2510 1810 G_502
TVQETLMVVNRIK QLLKLKELLNEML 2514 1811 G_502
AVEQ

PLDGWCEALGRRH ELQPLIATLNALL 2828 1936 H_group_500
PLERLRQALARRA ELQPLVDALNQLL 2829 1937 H_group_500
PLERLRQALARRA ELQPLVDALNQLL 2829 1937 H_group_500
PVRTLTGEIDRRA DLQPLVRAMNTML 2830 1938 H_group_500
PVRTLTGEIDRRA DLQPLVRAMNTML 2830 1938 H_group_500
PVRRLSEQLQTRP ELQPLVDAMNHVM 2831 1939 H_group_500
PVRRLSEQLQTRP ELQPLVDAMNHVM 2831 1939 H_group_500
PLHHLARAIGRRS ELQPMLAALNDLL 2832 1940 H_group_500
PLHHLARAIGRRS ELQPMLAALNDLL 2832 1940 H_group_500
PLIRLRNEVRNRT ELRPLVTALNQYM 2833 1941 H_group_500
PLRSLSDALQRRS ELVPMVNTLNSLL 2834 1942 H_group_500
PLRSLSDALQRRS ELVPMVNTLNSLL 2834 1942 H_group_500
PLKHLSRLLAKRK ELTPVVSTLNTMF 2835 1943 H_group_500
PVNRFAIHLRARA ELIPIKQAINKLL 2836 1944 H_group_500
PLTDLADSLNRQT EIRPLTGALNALL 2837 1945 H_group_500
PLRKLASQLDQMA ELQEVTKAVNRLN 2838 1946 H_group_500
PVRKLIDEISQAS EFDSLKQSINGLL 2839 1947 H_group_500
PLERLRQEVEGRS EVGPLVDAVNLHM 2840 1948 H_group_500
PLERLRQEVEGRS EVGPLVDAVNLHM 2840 1948 H_group_500
RLDGLVHDLAGRG EVAPLVEAMNGLI 2841 1949 H_group_500


PMSQIKQAVSQLK ELDELASGINRMA 2976 2073 mgroup_104
PLRHVLRISEGLS EMGTMSRALDRMT 2981 2074 mgroup_111
PLRHVLRISEGLS EMGTMSRALDRMT 2981 2074 mgroup_111
PINETKDFTAVIA EIGDLASNFDSMQ 2982 2075 mgroup_111
PLGVSVAIGESMS EIGTLSRVFAQLL 2983 2076 mgroup_111
PLGVSVAIGESMS EIGTLSRVFAQLL 2983 2076 mgroup_111
PIKATIDVGNHLA EVGDIARSFNSMK 2984 2077 mgroup_111
PIIQSANYSREIA ERGILARSIQSIN 2985 2078 mgroup_111
PIIATIEHSKKIA EVGSLAIAFQTIT 2986 2079 mgroup_111
PIIATIEHSKKIA EVGSLAIAFQTIT 2986 2079 mgroup_111
PIMKVTESAERIA EIGALAKGIQVIT 2987 2080 mgroup_111
PIIKIKEKAERLA EIGILSTALQNVI 2988 2081 mgroup_111
RVLELEAAATQIA SVGRVAAAFNSMA 2989 2082 mgroup_113
RVSGLEMAATRIA SVGRLAAAFNGMA 2990 2083 mgroup_113
RVERLERAATRIA FLGRLGMAFNGMA 2992 2084 mgroup_113
RLRSLEAAATRIA SVGRLGAAFNHMA 2993 2085 mgroup_113
RLRGLEGAATHIA SVGRLASSFNAMA 2994 2086 mgroup_113
PIHKLQRVVRQFG DIAELGQTLNTML 2995 2087 mgroup_22
PIQKLQRVARQFA DIAELGHSFNQML 2996 2088 mgroup_22
PVIKLQAVMRELS EIAHLSKDVNTTA 2997 2089 mgroup_22
PIQKLQLVIRKLA EIAQLSQDI

PIKEMTKLLEKMS EIDMLAEAFERMR 3171 2236 BACKGROUND
PVVELTRQIKKVE EINSLTTAFENMV 3172 2237 BACKGROUND
PIQNLTTVAEELS EIGTLARAISRLS 3173 2238 BACKGROUND
ELEPLVMATGRVA AANSLLVSIERMQ 3175 2239 BACKGROUND
SFNALMQGVKTIG EPGQAAMAINDLA 3176 2240 BACKGROUND
AIQQLTAVVDVLA ELGELAQSINLAA 3177 2241 BACKGROUND
PLRAMIRAVEDTA EMGILAKAFAQML 3178 2242 BACKGROUND
PIQHLMKVGQAWK EMGQLAIILNDMA 3179 2243 BACKGROUND
LLRAVTDGLSSFA ELGDLVERHNRLG 3180 2244 BACKGROUND
GIRPLVSGVHNLA IFGDLAQSINSAS 3181 2245 BACKGROUND
QLNALKDVMLHVE EVGQMAKAFNAMQ 3182 2246 BACKGROUND
SSDELASVAAAVS ELGAIGGSFNETA 3183 2247 BACKGROUND
PIYEITEVTKKIT EIGKLAKEFNVMA 3184 2248 BACKGROUND
PLEQITRVARRIT EIGQLATAINAMA 3185 2249 BACKGROUND
PMRTLIDQARRIG EIGELGREMDAMC 3186 2250 BACKGROUND
PLDVLLSSIDKVS ELAAIARSFASMS 3187 2251 BACKGROUND
PLDRLTVGIKALE EIGRLANAFNHMA 3188 2252 BACKGROUND
TLNHLTSAVNRMA EVGALAKAFNLMN 3189 2253 BACKGROUND
PLNRLTAGVKALG EIGNLATAFNTMS 3190 2254 BACKGROUND
RLQRVVYAARAWS ELGQLSRELNLMA 3191 2255 BACKGROUND
NLSELKTASKRIL EIGQTT

DVAQLSDCMRAVG EFAFIKRGFNALL 3350 2404 BACKGROUND
PLHQLTDAMVNIS EFSKIINSFNIFV 3351 2405 BACKGROUND
PLVQTCSELLDIQ EIGEIVNQINLFI 3352 2406 BACKGROUND
PLNRLRDMAKDLT EVGITSNYVNRFI 3353 2407 BACKGROUND
PVLDLLKHAKELA EIGEACTYINQFI 3354 2408 BACKGROUND
RVSGFTQKLDIMA ELGEIARHFDGLA 3355 2409 BACKGROUND
QLAAMAQAADALG ELAHAASRINRFL 3356 2410 BACKGROUND
DLAKTTDSLEGLA ELAKATRSINKFI 3357 2411 BACKGROUND
NIRELSDRIQVIA ELGELAEGVNMLI 3358 2412 BACKGROUND
KVQRTTNVLQDIA ELDVLAGHYNTFA 3359 2413 BACKGROUND
PIDRMASVLQRAD ELAVIGRTINKVL 3360 2414 BACKGROUND
SLNNMSQTMENVS EVGNTARAFNSLM 3361 2415 BACKGROUND
VLREMKEAIDSIA ELKELAKSLNSFL 3362 2416 BACKGROUND
SWIDLIHSLKDLA EAGAIAFLMNRYL 3363 2417 BACKGROUND
PVSEMAKVAEEIA EIGKLLEGFRAIS 3365 2418 BACKGROUND
PLQMVFDTLAEVA EMGMLATEVNVMA 3366 2419 BACKGROUND
PINQLVRETGAVA ELTQLARAFNKMV 3367 2420 BACKGROUND
RLNNVNTKLAYIT EISAMAHSVNAFI 3368 2421 BACKGROUND
PIRRLNTAVKEVA EVAVLSSDFNQTV 3369 2422 BACKGROUND
PLQRISDNMTTVA EISKLGQSFNTML 3370 2423 BACKGROUND
ALEKIDRDIAAIA AAGRIS

PITKLTAATRQLA ELGQLATSFNIMT 3527 2572 BACKGROUND
PLEMLAEASHRVG EMGSLFQDFNHMV 3529 2573 BACKGROUND
PLRKLVPAIERLG EYGRVAVAFNGMS 3530 2574 BACKGROUND
PLTDMKEAAIQMA EIGQLAESFNTLS 3531 2575 BACKGROUND
PLIEIERAVGKIA EIGRLARSVDAMR 3533 2576 BACKGROUND
HLKNFVIKLSKLK EVDELVEKFNELM 3534 2577 BACKGROUND
QIDAATRKAAAFD ELHPLVDAINAGL 3535 2578 BACKGROUND
PIDALSSQVVKLS EIRPLIAQFNALL 3536 2579 BACKGROUND
PLAIFSGRIETIT ELAGLAGSFNKML 3537 2580 BACKGROUND
PLRKFSMETSKID EMQSSIQVFNLML 3538 2581 BACKGROUND
PLRTMTATTLRIS ELTDLGDTIDGLL 3539 2582 BACKGROUND
PLRNLGVTLTQIN ELAPLVNAFNEML 3540 2583 BACKGROUND
PVEHMIQELRNIQ EMNNLATVINSML 3541 2584 BACKGROUND
PIKIFTKELASIN ELEELALTCNDML 3542 2585 BACKGROUND
PVARLRRSAAQLV ELSELAHTLNELI 3543 2586 BACKGROUND
PVDRMRRQAAEMA EIAALARTMNDLL 3544 2587 BACKGROUND
QVERIRGQVAGIS EIQALAETMNMML 3545 2588 BACKGROUND
PIEQITMTAQSIV ELQLLTVTINELL 3546 2589 BACKGROUND
PVKEVIESVKLIS EIEELVSTFNHLL 3547 2590 BACKGROUND
PFEDVIAKVNTIT EIKELITTFNYLL 3548 2591 BACKGROUND
PVQRVASAAESVG EVGRLV

VLRHLARTARRFG ELAELTRAFNAMA 3706 2740 BACKGROUND
PLARLVEAVKQMA ELQALETGFNAMA 3707 2741 BACKGROUND
PANAILEEANEIA EIGRLGLAFNRMA 3708 2742 BACKGROUND
GVDRVTCAARKIE EIDNLVDAFNNMT 3709 2743 BACKGROUND
PVRRLSEAASAVA EYARLSRAVNQMR 3710 2744 BACKGROUND
PIMDLIEAAGKVS EVALLGRAFNRMT 3711 2745 BACKGROUND
PIVELANASRQVA EVAELAEAFNRMT 3712 2746 BACKGROUND
PIKKLVIATDKVK EIGTLYAAFNRMI 3713 2747 BACKGROUND
PIKRLVKAADSVA EVGQLTEAITKMV 3714 2748 BACKGROUND
PLEAIARTAERIA EIGTLSRAFNRMT 3715 2749 BACKGROUND
HLRKIVQAVEQFR ELAVLGNTFNHMA 3716 2750 BACKGROUND
RVSDLVRAVRRLA EVAGLARAFNAMV 3717 2751 BACKGROUND
PIGELIAGAKRVS EFNSLINSFNTMM 3718 2752 BACKGROUND
RLQLLEDASRRVA ELGSLAQTLDDMA 3719 2753 BACKGROUND
PITAILEAADEVR ELALLVNGFNQMT 3720 2754 BACKGROUND
PIRRIVTAMRKIK EFELMNKTFNSMV 3721 2755 BACKGROUND
PIDTLVTGAQAIA EFTRLIHTFNAMA 3722 2756 BACKGROUND
PLTNLQIAAERVT QIAQLSQAFNAML 3723 2757 BACKGROUND
PINSLYNASEEIM EINLLIRAFNRMI 3724 2758 BACKGROUND
RIARLAAATRPVA EVADLGRAFDRML 3725 2759 BACKGROUND
PIHRLIKASQEVS EIAILQ

PIGRLIASLERLE EVGRLAGALESFR 3882 2908 BACKGROUND
PLRRLAKVADELA EIGDLAHGLERVT 3883 2909 BACKGROUND
PLVRVAQAISQIS ELGTMARATDALR 3884 2910 BACKGROUND
PLSQLTSAIRRLA EIGELAASVHVFR 3885 2911 BACKGROUND
PLVGVSRIVDEVS ELGDLGKSVDSLR 3886 2912 BACKGROUND
PLTRIECAMTSVA EIGKISRRLDSFR 3887 2913 BACKGROUND
PVRKLTYALNDMS EIGSMQLAFEKLR 3888 2914 BACKGROUND
PIRNIGETSALLA EIGEMARALESLR 3889 2915 BACKGROUND
PIKKVNMLVENLS EIGNMAKSVERLR 3890 2916 BACKGROUND
PLGDVATAINRLA EIGLIARHIEALK 3891 2917 BACKGROUND
PLTGLTRSVDKLS EIGAIARALDGFR 3892 2918 BACKGROUND
RIGDLLQGVRRIA EIGDIARALNGFR 3893 2919 BACKGROUND
PATRVRDVVEKLV EIGALARSIDVLR 3894 2920 BACKGROUND
PLRRASETLRRLA EIGDLWQSTKQLL 3895 2921 BACKGROUND
PLITLTKATEALA EIGQLTRGFDQMR 3896 2922 BACKGROUND
PLRDLVAGVHAIR ELGQLAEAVESLR 3898 2923 BACKGROUND
SLTQLAAAMERIR EVGKIARGVADFS 3899 2924 BACKGROUND
PISALAETMKKIS EIGIVSNVLFHFQ 3900 2925 BACKGROUND
PLKTVGETIDVIA EMGAIARTIDNLK 3901 2926 BACKGROUND
PLFQTRQAMLRLA ELGDLSQALRVFR 3902 2927 BACKGROUND
PIRGLQKGVNAIA ETGDLA

PLSDLRGVIRRVQ EVSDTARAFNMML 4055 3075 BACKGROUND
PLTQLREDMKKVQ ELGDLVDSFNELL 4056 3076 BACKGROUND
NIATVVKGMLKLE ELSQLGKSYNKTL 4057 3077 BACKGROUND
PLTDLQATMKEVK EIMSLHNSYNIMM 4058 3078 BACKGROUND
PIQKLRDSMKKVQ EIGSLTKSFDVMT 4059 3079 BACKGROUND
PIKQLTRQMKMAQ EFGLMSRQFNRMM 4060 3080 BACKGROUND
PLTNLSELTQRAA EIGALSISFNNMI 4061 3081 BACKGROUND
PIKNLCISTKLVG EIAMLTTSFNTMI 4062 3082 BACKGROUND
PIIKLTRVAKLVR EVGILAGTFNEMI 4063 3083 BACKGROUND
PIIRLTQSMKAVE EIGTLERRFNSML 4064 3084 BACKGROUND
PITELVPLMQAAG EFGKLGRSFNLMI 4065 3085 BACKGROUND
PIKQLTANVWRVE EIGLLNRQFQRMI 4066 3086 BACKGROUND
PVVKLTGFMKQVA ELGQLSDACNQFL 4067 3087 BACKGROUND
DLERVTKTFAVAE EIGLLSHSFNNMA 4068 3088 BACKGROUND
PVQILYRMMEEVE EFGRLGKNFNQML 4069 3089 BACKGROUND
PIRNLKNGMRQTE EIGGLIHSYNLMV 4070 3090 BACKGROUND
RLLRLSKHITKVG EIGQLSRQFNSMV 4071 3091 BACKGROUND
PIRRLQHVMKSVE EIGELARGFNKMV 4072 3092 BACKGROUND
PVSLLDKAMAKVK ELGRLTESFNQMT 4073 3093 BACKGROUND
PLRRLRDSMSLVE EIGSLSKSFNAMT 4074 3094 BACKGROUND
PINKLKSCMEIAS EIGVLA

PLRELERSLQKIA EMASLTRSINDTL 4227 3243 BACKGROUND
HLVTIAQYFSRYD ELDQLTASFATMA 4228 3244 BACKGROUND
PLKILTRVSKQIS EPGQLARAFQFML 4229 3245 BACKGROUND
YLKEFTRVSKKVS ELGELSKNFNYMI 4230 3246 BACKGROUND
PLSKLEKVIQQFK ELGDFSKSFDAIR 4231 3247 BACKGROUND
PIKAISSVVSQVQ DITDLGLTFNSML 4232 3248 BACKGROUND
PMAHMQSVIRRIN EMGDLGTAFNKLL 4233 3249 BACKGROUND
PLREMKASAKEME ELTELAHSFNELA 4234 3250 BACKGROUND
PLKEMSLAVQQVA EIGDLAKSFNIMA 4235 3251 BACKGROUND
PLIDMKRASTVYA EIGELAETLNKMA 4236 3252 BACKGROUND
PLQELTAAAEGLA EIGRLATAFNRMA 4237 3253 BACKGROUND
PLKNLQTTAQKLG EVGQLAVTFNAMA 4238 3254 BACKGROUND
PLQELVDATQRFR ELGQLATSFNTMA 4239 3255 BACKGROUND
PLKTITKEIKNFA EIGTLAKTFNSMA 4240 3256 BACKGROUND
PLAKLVGLSRDIA EIGQLATEFDHMA 4241 3257 BACKGROUND
PIKNLEYTFNKAQ EIGQLGHSFNQMA 4242 3258 BACKGROUND
PLLEMSRVSVDMA EIGRLAENLNLLA 4243 3259 BACKGROUND
PLKAMNSTALEMA ELGQLGNSLDLLA 4244 3260 BACKGROUND
PIQTLSNVSRRLA EISQLAHSVNQLA 4245 3261 BACKGROUND
PVQQLNYVAGRLM ELGMYARTFNRMA 4246 3262 BACKGROUND
PLGIIQNTVNRVR EIGQLS

RMGTLLKAIRIVR ELALLADEFDQLT 4382 3387 BACKGROUND
PIGHFVNIINSAS ELGQIAKAYNHLL 4383 3388 BACKGROUND
PIGHFVNIINSAS ELGQIAKAYNHLL 4383 3388 BACKGROUND
PLQELYVKMRRFE EIKGLSRSYNRMI 4384 3389 BACKGROUND
PLQELYVKMRRFE EIKGLSRSYNRMI 4384 3389 BACKGROUND
PLAELTAASEALA ELGDLAAAFGRMR 4385 3390 BACKGROUND
PLAELTAASEALA ELGDLAAAFGRMR 4385 3390 BACKGROUND
KLDQILHGVLLYE ELAELTDRLNRMA 4386 3391 BACKGROUND
KLDQILHGVLLYE ELAELTDRLNRMA 4386 3391 BACKGROUND
PLGEILLGVKRLE MYNDLYYSLNNLS 4387 3392 BACKGROUND
PLGEILLGVKRLE MYNDLYYSLNNLS 4387 3392 BACKGROUND
KLKDIISTMRKVE EIDELAFHFQKMI 4388 3393 BACKGROUND
KLKDIISTMRKVE EIDELAFHFQKMI 4388 3393 BACKGROUND
PIDALADAVRDVE EIGALAMAFNSMT 4389 3394 BACKGROUND
PIDALADAVRDVE EIGALAMAFNSMT 4389 3394 BACKGROUND
NLRTITQVARRFK DVAVFAHTFNDMA 4390 3395 BACKGROUND
NLRTITQVARRFK DVAVFAHTFNDMA 4390 3395 BACKGROUND
PLASMVEVAKRVE EMAEMVVAFNRMI 4391 3396 BACKGROUND
PIYKLLDVIRKIK EFGEIATAFNGLM 4392 3397 BACKGROUND
PIYKLLDVIRKIK EFGEIATAFNGLM 4392 3397 BACKGROUND
PLQRIIAVIDRFK ELGRIA

PILKLHRRASEIE EIGEFSNAFDSMT 4477 3481 BACKGROUND
PLLHMTRATRNMP EVGDLARSFDQMA 4478 3482 BACKGROUND
PLLHMTRATRNMP EVGDLARSFDQMA 4478 3482 BACKGROUND
PVLKLSRGMKRLS EFGFLIQSFNKMA 4479 3483 BACKGROUND
PVLKLSRGMKRLS EFGFLIQSFNKMA 4479 3483 BACKGROUND
PILLITKAAIDIS ELGILAKSFNLMT 4480 3484 BACKGROUND
PIVEMERAVEKIS ELGQLAHSIDAMR 4481 3485 BACKGROUND
PIVEMERAVEKIS ELGQLAHSIDAMR 4481 3485 BACKGROUND
PITRLSKAMKEVS EVGESYQSFNVMT 4482 3486 BACKGROUND
PITRLSKAMKEVS EVGESYQSFNVMT 4482 3486 BACKGROUND
PIVQLSKAAKALS EFNVLANAFNQMS 4483 3487 BACKGROUND
PIVQLSKAAKALS EFNVLANAFNQMS 4483 3487 BACKGROUND
YFSVLSEGLGLIS ELGMLARNMNKMA 4484 3488 BACKGROUND
PISQLNHQIESYS EIGSLSRSFEQMS 4485 3489 BACKGROUND
PISQLNHQIESYS EIGSLSRSFEQMS 4485 3489 BACKGROUND
PILTLGKVAAALE EFGELAHVFLRMV 4486 3490 BACKGROUND
PILTLGKVAAALE EFGELAHVFLRMV 4486 3490 BACKGROUND
PIDKLTTMAKLIE EFGQFARLFIVME 4487 3491 BACKGROUND
PIDKLTTMAKLIE EFGQFARLFIVME 4487 3491 BACKGROUND
PILKINSATSYIP GIGSLARSFITMA 4488 3492 BACKGROUND
PILKINSATSYIP GIGSLA

RLSIIAKRAEDIS ELATLMVTVNRMT 4576 3573 BACKGROUND
RLSIIAKRAEDIS ELATLMVTVNRMT 4576 3573 BACKGROUND
PVSQMIVASRAIS EIGQLAESFNRMK 4577 3574 BACKGROUND
PVSQMIVASRAIS EIGQLAESFNRMK 4577 3574 BACKGROUND
PLRALKKQIDRFA EISQVSEAFYRAI 4578 3575 BACKGROUND
PLRALKKQIDRFA EISQVSEAFYRAI 4578 3575 BACKGROUND
PLKRLASYALRLG EIVELALCLNRMR 4579 3576 BACKGROUND
PLKRLASYALRLG EIVELALCLNRMR 4579 3576 BACKGROUND
PLRLIALYAEELG EMWELVRTLNLMR 4580 3577 BACKGROUND
PMKRLVRQAEQVA EVAQLSEALSGMA 4581 3578 BACKGROUND
PMKRLVRQAEQVA EVAQLSEALSGMA 4581 3578 BACKGROUND
PVEALTDAAQEVF EIGELAEAFNHVQ 4582 3579 BACKGROUND
RIELLVEKQKQFG PVSDLASSFNFMA 4583 3580 BACKGROUND
TMEGFQDVAAKLA EMSSISKGMNQVA 4584 3581 BACKGROUND
DLETLRLAAQRFG DIRTLAQHFNQMA 4585 3582 BACKGROUND
DLETLRLAAQRFG DIRTLAQHFNQMA 4585 3582 BACKGROUND
KIRQVRYALNRMK EMANLASSYNNMS 4586 3583 BACKGROUND
KIRQVRYALNRMK EMANLASSYNNMS 4586 3583 BACKGROUND
RLEALRDGFAALK EVAQLTAGFNEMA 4589 3584 BACKGROUND
SLQALEIGLLNFK QLDALVSLYNQAS 4590 3585 BACKGROUND
SLQALEIGLLNFK QLDALV

PLIKLTEQTDAIG EIGDLSIAVRSMA 4674 3667 BACKGROUND
 EIGDLSIAVRSMA 4674 3667 BACKGROUND
QIDGLRDGALRMA ELGRLAATFNSMQ 4675 3668 BACKGROUND
PITELAEQTEQIT EIGILATQINDLS 4676 3669 BACKGROUND
SIREIEQASIKMR EIAHTAQAFNDLI 4677 3670 BACKGROUND
SIREIEQASIKMR EIAHTAQAFNDLI 4677 3670 BACKGROUND
PIRGLTRFAQAMS ELRQLARALNHLT 4678 3671 BACKGROUND
PIRGLTRFAQAMS ELRQLARALNHLT 4678 3671 BACKGROUND
SLTRLVKQISTIS ETRQLGLAFNAMI 4679 3672 BACKGROUND
SLTRLVKQISTIS ETRQLGLAFNAMI 4679 3672 BACKGROUND
PIKEMSEYIDLIV EIRDLFDNVNWMV 4680 3673 BACKGROUND
PISEMVRHLDLLV EMGRMADGIRTLQ 4681 3674 BACKGROUND
PIDALIARTEEIA EIAVLSQSFLDLA 4682 3675 BACKGROUND
PIDALIARTEEIA EIAVLSQSFLDLA 4682 3675 BACKGROUND
PIKALTRRSLAIG EIHALSESMLGMS 4683 3676 BACKGROUND
PLLNMARHIRSMH EIGELGLAFNDLL 4684 3677 BACKGROUND
PLLNMARHIRSMH EIGELGLAFNDLL 4684 3677 BACKGROUND
RVRELTSAIQSMR EMGLLASAFNRMS 4685 3678 BACKGROUND
RVRELTSAIQSMR EMGLLASAFNRMS 4685 3678 BACKGROUND
PIKNVILKLDDFS ETKKLTDALNTSI 4686 3679 BACKGROUND
PVGVLTFEAERIA EVGRLGGALERMR 4687 

PLNDLRYHFSVIA CVGRLFPLLREMQ 4779 3765 BACKGROUND
PLNDLRYHFSVIA CVGRLFPLLREMQ 4779 3765 BACKGROUND
PLDRLAERARQIG EFGRIEFALQMLE 4780 3766 BACKGROUND
PLDRLAERARQIG EFGRIEFALQMLE 4780 3766 BACKGROUND
AVQSLSAAAAEIE EIGQLFASIGSMR 4781 3767 BACKGROUND
AVQSLSAAAAEIE EIGQLFASIGSMR 4781 3767 BACKGROUND
ALNDLEAKAAALE ELGRLFEAFASLR 4782 3768 BACKGROUND
ALNDLEAKAAALE ELGRLFEAFASLR 4782 3768 BACKGROUND
SLRRLSTQAEALA EVGDVQNAFYEMK 4783 3769 BACKGROUND
SLRRLSTQAEALA EVGDVQNAFYEMK 4783 3769 BACKGROUND
TLTDLHMQTVAVA EFGDMYDAIDNLR 4784 3770 BACKGROUND
TLTDLHMQTVAVA EFGDMYDAIDNLR 4784 3770 BACKGROUND
PIIQLSKQVRAVG EFGVLYKGIRKMV 4785 3771 BACKGROUND
PIIQLSKQVRAVG EFGVLYKGIRKMV 4785 3771 BACKGROUND
QMQPVLDAIGQIK EFDDCLSAIDDMR 4786 3772 BACKGROUND
QMQPVLDAIGQIK EFDDCLSAIDDMR 4786 3772 BACKGROUND
ALTDLRYVTSKLA EFGDVYRAVDDLR 4787 3773 BACKGROUND
ALTDLRYVTSKLA EFGDVYRAVDDLR 4787 3773 BACKGROUND
ALTDMSDRAEAIA EVGDLRRSFRDIQ 4788 3774 BACKGROUND
ALTDMSDRAEAIA EVGDLRRSFRDIQ 4788 3774 BACKGROUND
SLREITTQTAAIA EIGRVR

GLGALATGAKAYG ELSDLARQFTGMA 4876 3857 BACKGROUND
GLGALATGAKAYG ELSDLARQFTGMA 4876 3857 BACKGROUND
PLQELRRGIRSIT EFGELAGAFNEMA 4877 3858 BACKGROUND
PLQELRRGIRSIT EFGELAGAFNEMA 4877 3858 BACKGROUND
YFNDISNGIRHLA EFKDIAKDINLAS 4878 3859 BACKGROUND
YFNDISNGIRHLA EFKDIAKDINLAS 4878 3859 BACKGROUND
PINTLVRAAGVIG EFYPLGEAINKMA 4879 3860 BACKGROUND
PLENLKNHIISWE EVDELSQAFENLI 4880 3861 BACKGROUND
PLENLKNHIISWE EVDELSQAFENLI 4880 3861 BACKGROUND
PLKHLEDGLSAYQ EFVTLGQQLNAMA 4881 3862 BACKGROUND
PLKHLEDGLSAYQ EFVTLGQQLNAMA 4881 3862 BACKGROUND
SFNSLMRGLKNVH EPGRASASVNELA 4882 3863 BACKGROUND
SFNSLMRGLKNVH EPGRASASVNELA 4882 3863 BACKGROUND
PVKILQESATLLG EVGDLGRAFVSMA 4883 3864 BACKGROUND
PVKILQESATLLG EVGDLGRAFVSMA 4883 3864 BACKGROUND
PLKALEVAARKIA ELIALVTSMRVMQ 4884 3865 BACKGROUND
PLKALEVAARKIA ELIALVTSMRVMQ 4884 3865 BACKGROUND
PLDKLATGFEEIA ELMQIQHSFNVMS 4885 3866 BACKGROUND
PIAQLRKQVERIA EVRDLVDAVNQLA 4886 3867 BACKGROUND
PMRSLERAVAEVA ELRELAERFNVMT 4887 3868 BACKGROUND
PMRSLERAVAEVA ELRELA

PIRDLTKKMEKVS EIVKLADSFNTMA 4972 3950 BACKGROUND
PVSLLEAGAARIA EFGRLAQQFNRMT 4973 3951 BACKGROUND
PVSLLEAGAARIA EFGRLAQQFNRMT 4973 3951 BACKGROUND
PLSQLTDASRQLS ELALLSRSFNDAA 4974 3952 BACKGROUND
PLSQLTDASRQLS ELALLSRSFNDAA 4974 3952 BACKGROUND
PLKDMDSGAKQIA EFGRVAASFNHMT 4975 3953 BACKGROUND
AVKSLSGAAENLA EIGELTKNFNKMT 4976 3954 BACKGROUND
AVKSLSGAAENLA EIGELTKNFNKMT 4976 3954 BACKGROUND
PIKRLTVVTRRLQ EVGELADSFNCMS 4977 3955 BACKGROUND
PIKRLTVVTRRLQ EVGELADSFNCMS 4977 3955 BACKGROUND
PIQAVLAGVNTLA EFADLARAFNAMA 4978 3956 BACKGROUND
PIQAVLAGVNTLA EFADLARAFNAMA 4978 3956 BACKGROUND
TLKNLSHSAQHIA EIGQLASTFNTMT 4979 3957 BACKGROUND
TLKNLSHSAQHIA EIGQLASTFNTMT 4979 3957 BACKGROUND
SVKKLSVTAGKIA EIGQLSQEINAIT 4980 3958 BACKGROUND
SVKKLSVTAGKIA EIGQLSQEINAIT 4980 3958 BACKGROUND
PLSRLVDAARRVG ELGRLADVFNAMS 4981 3959 BACKGROUND
PIFQLISASHKIE EFSFLSEAFNSMS 4982 3960 BACKGROUND
PIFQLISASHKIE EFSFLSEAFNSMS 4982 3960 BACKGROUND
RLDPLVKAADAMA EIGELARAFNQMS 4983 3961 BACKGROUND
RLDPLVKAADAMA EIGELA

EIHKLKEAFSTVA EFAELGDNFNFMV 5078 4046 BACKGROUND
IIYDLIGALDQVE EFERIGHSFNTMI 5079 4047 BACKGROUND
IIYDLIGALDQVE EFERIGHSFNTMI 5079 4047 BACKGROUND
GIRHLQTAVHKVA ELGDITNDLNEMS 5080 4048 BACKGROUND
GIRHLQTAVHKVA ELGDITNDLNEMS 5080 4048 BACKGROUND
PIEQLKTGSLALA EMNDLAQAFNKMA 5081 4049 BACKGROUND
PIEQLKTGSLALA EMNDLAQAFNKMA 5081 4049 BACKGROUND
PVAQLQAGAARIA ELVSLARRWNEMT 5082 4050 BACKGROUND
PLDRLIEKTQRIA ELSQLARALNDMC 5083 4051 BACKGROUND
PLDRLIEKTQRIA ELSQLARALNDMC 5083 4051 BACKGROUND
PLSKMIAGVKRMK EFDELIRHYNEMA 5085 4052 BACKGROUND
PLQQLISGMKKIS EFVFLANTFNNMA 5086 4053 BACKGROUND
PLAKLTGIAALFK QFEDIGEVFNQLA 5087 4054 BACKGROUND
PLAKLTGIAALFK QFEDIGEVFNQLA 5087 4054 BACKGROUND
SIDKLIHALAQLR EFQILGNEYNVML 5088 4055 BACKGROUND
SIDKLIHALAQLR EFQILGNEYNVML 5088 4055 BACKGROUND
SIEHITDVVNKIS EFAQLAEVLNDLC 5089 4056 BACKGROUND
SIEHITDVVNKIS EFAQLAEVLNDLC 5089 4056 BACKGROUND
RLKRMASAIKSAG ELSDLADHFNKMR 5090 4057 BACKGROUND
RLKRMASAIKSAG ELSDLADHFNKMR 5090 4057 BACKGROUND
DIYKIADAFEQVK EFKEIG

QLASLRDASQRLA ELAETARAFNTMA 5178 4140 BACKGROUND
RLTALAGGVAEIG ELADLARAINGML 5179 4141 BACKGROUND
RLTALAGGVAEIG ELADLARAINGML 5179 4141 BACKGROUND
NLTEIERFVTDIG EFIRLSGGINSML 5180 4142 BACKGROUND
NLTEIERFVTDIG EFIRLSGGINSML 5180 4142 BACKGROUND
PIKKLQNATEQII DLGQLSRNFQNMI 5181 4143 BACKGROUND
 DLGQLSRNFQNMI 5181 4143 BACKGROUND
PIKHLTEVSKKIA EIGVLASRYSDTV 5182 4144 BACKGROUND
PITRLTNATKELS EISKLNQGFNQMA 5183 4145 BACKGROUND
PLRQLRRSAQALA EIGELSAAFGAMA 5184 4146 BACKGROUND
PLRQLRRSAQALA EIGELSAAFGAMA 5184 4146 BACKGROUND
ALTQLFDATKKVA EVGSLADNFNIIA 5185 4147 BACKGROUND
ALTQLFDATKKVA EVGSLADNFNIIA 5185 4147 BACKGROUND
AFKALKKGSIIVG EIGETAEAFNAMS 5186 4148 BACKGROUND
PVRQLCEMTKEVA ELQILTNNFEHMA 5187 4149 BACKGROUND
PVRQLCEMTKEVA ELQILTNNFEHMA 5187 4149 BACKGROUND
PTREILDATRLLQ EFTKIADGFNAMA 5188 4150 BACKGROUND
PTREILDATRLLQ EFTKIADGFNAMA 5188 4150 BACKGROUND
PIVAIQEAIGRYG ELGEIARAFNDMA 5189 4151 BACKGROUND
PIVAIQEAIGRYG ELGEIARAFNDMA 5189 4151 BACKGROUND
PLETLAVGVSALG EVVKLTQAFVDMR 5190 

PLQNLQAVILKIG ELVDLAQQFNKML 5274 4232 BACKGROUND
PMDALVRATRELG EFAVLADAFNDMA 5275 4233 BACKGROUND
PMDALVRATRELG EFAVLADAFNDMA 5275 4233 BACKGROUND
RMGRLSAGASVVG EFAEFSCAFDAMT 5276 4234 BACKGROUND
RMGRLSAGASVVG EFAEFSCAFDAMT 5276 4234 BACKGROUND
PLNNLQSITEEMA EFAMLTESLGKMQ 5278 4235 BACKGROUND
PLNNLQSITEEMA EFAMLTESLGKMQ 5278 4235 BACKGROUND
TLEMIMQRVHDIS EFGALASQVNAMS 5279 4236 BACKGROUND
PVNKLSDQAHRLA ELQKLAVAFNGVL 5280 4237 BACKGROUND
PVNKLSDQAHRLA ELQKLAVAFNGVL 5280 4237 BACKGROUND
PLRQLKEGVHAIR EMDELIDVYNQMI 5281 4238 BACKGROUND
PLRQLKEGVHAIR EMDELIDVYNQMI 5281 4238 BACKGROUND
PIRELQGGVQRVH ELQELADEFNAMV 5282 4239 BACKGROUND
PIRELQGGVQRVH ELQELADEFNAMV 5282 4239 BACKGROUND
PIRQISDAMTSFD ELDKIGHAYNEML 5283 4240 BACKGROUND
QVEALIDGFRALR EWERIGTGFNEMS 5284 4241 BACKGROUND
QVEALIDGFRALR EWERIGTGFNEMS 5284 4241 BACKGROUND
SSRGLKRASLALG ELQEAGAAFNTMA 5285 4242 BACKGROUND
PILDLVEGANRMA ELDTLTVAVNHMM 5286 4243 BACKGROUND
PILDLVEGANRMA ELDTLTVAVNHMM 5286 4243 BACKGROUND
KINEVHEGAQKII ELGKLS

FSAQTKVMVHMHA VYGELATGINTMI 5378 4326 BACKGROUND
FNGEMQTMIRLQQ DYGTLAHGVNTVV 5379 4327 BACKGROUND
FNGEMQTMIRLQQ DYGTLAHGVNTVV 5379 4327 BACKGROUND
FVKEVNWMNDTFK VYRQMAQSVNDGM 5380 4328 BACKGROUND
FVKEVNWMNDTFK VYRQMAQSVNDGM 5380 4328 BACKGROUND
PIRLVSLGISDFA DFRRLIQNLNQIV 5381 4329 BACKGROUND
PIRLVSLGISDFA DFRRLIQNLNQIV 5381 4329 BACKGROUND
FSGETTRMIALHA VYGELSKGINTMM 5382 4330 BACKGROUND
FSGETTRMIALHA VYGELSKGINTMM 5382 4330 BACKGROUND
PLAQMAAHVRHMN ELKDLAAAYAEMR 5383 4331 BACKGROUND
PLAQMAAHVRHMN ELKDLAAAYAEMR 5383 4331 BACKGROUND
IIAPVTELAAAVE EIGMLARAFAKRT 5384 4332 BACKGROUND
IIAPVTELAAAVE EIGMLARAFAKRT 5384 4332 BACKGROUND
PLVDLAAAAERLGPLVDLAAAAERLG EADAVAEVLDRSA 5385 4333 BACKGROUND
PLVDLAHVAHEIG EVGIVAEAVNDMA 5386 4334 BACKGROUND
PLVDLAHVAHEIG EVGIVAEAVNDMA 5386 4334 BACKGROUND
PLTALSNTAHEVA EVGVVTRSFNQML 5387 4335 BACKGROUND
PLRRSADLVHQIK EAMLIARDIQEMR 5388 4336 BACKGROUND
PLRRSADLVHQIK EAMLIARDIQEMR 5388 4336 BACKGROUND
NMGLLTDSAEAIS ETVDLGDSINIMV 5389 4337 BACKGROUND
PLSIIEH

RLANLSNSIDAII EIGRLSAKLIEYG 5476 4419 BACKGROUND
RLANLSNSIDAII EIGRLSAKLIEYG 5476 4419 BACKGROUND
PIKELTNVAEKIA EVGNLSRAFHKMQ 5477 4420 BACKGROUND
EPYDASRTALDIA GRGSLMEAIATMQ 5478 4421 BACKGROUND
GLSSALALSSRLA GMAHLLQSNNEVV 5479 4422 BACKGROUND
GLSSALALSSRLA GMAHLLQSNNEVV 5479 4422 BACKGROUND
PLEQAAALATRLA EIGDVVRAMGTMR 5480 4423 BACKGROUND
PLALAVTTAQRVA ESAKLMRALGHMS 5481 4424 BACKGROUND
PLALAVTTAQRVA ESAKLMRALGHMS 5481 4424 BACKGROUND
PLKGAVDAADNIA ETGYLLQAIRKMR 5482 4425 BACKGROUND
PLKGAVDAADNIA ETGYLLQAIRKMR 5482 4425 BACKGROUND
PLTAVTERANAVA EIGELATTFEDMV 5483 4426 BACKGROUND
PLTAVTERANAVA EIGELATTFEDMV 5483 4426 BACKGROUND
PLEHSVAVLDAMA ETAQMLRRMNAMQ 5484 4427 BACKGROUND
PLEHSVAVLDAMA ETAQMLRRMNAMQ 5484 4427 BACKGROUND
PLASASRIADSIA EASMLIRSLAIMQ 5485 4428 BACKGROUND
PLASASRIADSIA EASMLIRSLAIMQ 5485 4428 BACKGROUND
PLHQACDLIMRVA EIGQLLRALSRMQ 5487 4429 BACKGROUND
PLHQACDLIMRVA EIGQLLRALSRMQ 5487 4429 BACKGROUND
PLNDAVDVLTRVA ELGRLLSAVARMR 5488 4430 BACKGROUND
PLNDAVDVLTRVA ELGRLL

QLRELQETAARVA GDGELFAAFRSMV 5586 4513 BACKGROUND
PLTATVSVLEGVT EVGQLGAALNVAI 5587 4514 BACKGROUND
PLTATVSVLEGVT EVGQLGAALNVAI 5587 4514 BACKGROUND
NLRDIAHTAEKIA QAGDLTGAFNVMV 5588 4515 BACKGROUND
AIDNVMPAITKAT VLEDFARQFSAHI 5590 4516 BACKGROUND
AIDNVMPAITKAT VLEDFARQFSAHI 5590 4516 BACKGROUND
PLERAIALFERIG EIGQLARAFDRMI 5591 4517 BACKGROUND
PLERAIALFERIG EIGQLARAFDRMI 5591 4517 BACKGROUND
PIRQLIDYVAQLS ELGKLAMAANTLR 5592 4518 BACKGROUND
PIRQLIDYVAQLS ELGKLAMAANTLR 5592 4518 BACKGROUND
PLYALVHEVEALQ EIGVLARALDGLS 5593 4519 BACKGROUND
PLYALVHEVEALQ EIGVLARALDGLS 5593 4519 BACKGROUND
PLLRLAQASHSIA EVGRLTATFNQMK 5595 4520 BACKGROUND
PLLRLAQASHSIA EVGRLTATFNQMK 5595 4520 BACKGROUND
PIEGLTRAVRAIS ETGELARAFNAMS 5596 4521 BACKGROUND
PIEGLTRAVRAIS ETGELARAFNAMS 5596 4521 BACKGROUND
QVQTLLHSMQQVQ EIGVISQAFNEMC 5597 4522 BACKGROUND
PIEALAAHTRVIA ELGRLADSLDAMS 5598 4523 BACKGROUND
PIEALAAHTRVIA ELGRLADSLDAMS 5598 4523 BACKGROUND
PLVEMASTANRIQ ELGVLARAFNDMT 5599 4524 BACKGROUND
PLVEMASTANRIQ ELGVLA

PLNSMTEVARQMA EVGALADAMKNMT 5689 4605 BACKGROUND
PLGELTAYTGAVA ELGETAVSVHTMV 5690 4606 BACKGROUND
PLGELTAYTGAVA ELGETAVSVHTMV 5690 4606 BACKGROUND
PLLAVERVLGDVA ELGRLLLSARTMR 5691 4607 BACKGROUND
KIAGVVDIAQKIS EVGQLQNAFYTMN 5693 4608 BACKGROUND
KIAGVVDIAQKIS EVGQLQNAFYTMN 5693 4608 BACKGROUND
PIRSCMQVADAIA ETAVLMRAMGNMA 5694 4609 BACKGROUND
PIRSCMQVADAIA ETAVLMRAMGNMA 5694 4609 BACKGROUND
PLENVVRISRDVA EISQLMGAVREMR 5695 4610 BACKGROUND
PLENVVRISRDVA EISQLMGAVREMR 5695 4610 BACKGROUND
PLREATDIANALA ETGMLLRALNQMN 5696 4611 BACKGROUND
PLKDAVALAEAVA EPAQLMDALDKMQ 5697 4612 BACKGROUND
PLKDAVALAEAVA EPAQLMDALDKMQ 5697 4612 BACKGROUND
PLHLAIGAADTLA EIGTLAVAVNTLK 5698 4613 BACKGROUND
PLHLAIGAADTLA EIGTLAVAVNTLK 5698 4613 BACKGROUND
PIGRLTAAVEAIG ETGVLARAFAHMV 5699 4614 BACKGROUND
PIGRLTAAVEAIG ETGVLARAFAHMV 5699 4614 BACKGROUND
PIDEARRFAASIA EAAELMRALVEMQ 5701 4615 BACKGROUND
PIDEARRFAASIA EAAELMRALVEMQ 5701 4615 BACKGROUND
PLRAASRTAEAIA ETGQLMHSMQQMQ 5702 4616 BACKGROUND
PLRAASRTAEAIA ETGQLM

SLNRAVQVAESIA ETAQLMRALSTMQ 5787 4698 BACKGROUND
NIKVLINIFSKMS EFGKLFNEMNKMK 5788 4699 BACKGROUND
NIKVLINIFSKMS EFGKLFNEMNKMK 5788 4699 BACKGROUND
PVNAAVDYFAKIA EMGYLLDSLNDMQ 5789 4700 BACKGROUND
PVNAAVDYFAKIA EMGYLLDSLNDMQ 5789 4700 BACKGROUND
PPAQLVQFANRMA LVGQLTKGLNQLN 5790 4701 BACKGROUND
PPAQLVQFANRMA LVGQLTKGLNQLN 5790 4701 BACKGROUND
SMRGAVTAATEIA EVGELMRSMQRMQ 5791 4702 BACKGROUND
SMRGAVTAATEIA EVGELMRSMQRMQ 5791 4702 BACKGROUND
RLDVISLKAKSIA ELGRLSNSVNEMQ 5792 4703 BACKGROUND
RLDVISLKAKSIA ELGRLSNSVNEMQ 5792 4703 BACKGROUND
GLGRAVAAAESVA EIGTLLTSMNRML 5793 4704 BACKGROUND
GLGRAVAAAESVA EIGTLLTSMNRML 5793 4704 BACKGROUND
PLDAAARVADAIA EVGRLLSAMQRMQ 5794 4705 BACKGROUND
PLDAAARVADAIA EVGRLLSAMQRMQ 5794 4705 BACKGROUND
QFNRISDVLSAVE AVGQLMKQVNAMI 5795 4706 BACKGROUND
PLDTMIHHLEKMA EIGKLTEALKSMQ 5796 4707 BACKGROUND
PLDTMIHHLEKMA EIGKLTEALKSMQ 5796 4707 BACKGROUND
DIATMGAAMDRFA EMGRLHERFNRAV 5797 4708 BACKGROUND
DIATMGAAMDRFA EMGRLHERFNRAV 5797 4708 BACKGROUND
PIDEVLGAARKMA EVGEVV

DIEKIADIFAVAE EIGMLSQSFNKMV 5889 4796 BACKGROUND
DIEKIADIFAVAE EIGMLSQSFNKMV 5889 4796 BACKGROUND
RIGKLVEATQRLA EIGLLGEYTNQTI 5890 4797 BACKGROUND
PIKKMTQYVGQVA EIAQLSSGIENMT 5891 4798 BACKGROUND
PLARILAAVESLA DLGRLAVGINAMV 5892 4799 BACKGROUND
PLARILAAVESLA DLGRLAVGINAMV 5892 4799 BACKGROUND
GIGKLVKLMNKIK EIGKLGINFVDMV 5893 4800 BACKGROUND
PLVSMTQSVEHLA EIGKLIKAFNKMS 5894 4801 BACKGROUND
PLVSMTQSVEHLA EIGKLIKAFNKMS 5894 4801 BACKGROUND
PVKDMGEIAHQAA EIGQLAKALKNMI 5895 4802 BACKGROUND
PVKDMGEIAHQAA EIGQLAKALKNMI 5895 4802 BACKGROUND
PLSYLKDVANQLR EFGEFANIFNDMI 5896 4803 BACKGROUND
PLSYLKDVANQLR EFGEFANIFNDMI 5896 4803 BACKGROUND
PVKELNVLMSRAG EIGALAQSFNQMK 5897 4804 BACKGROUND
PLKNLTRVSSKIA EVQTLSQYFGTMV 5898 4805 BACKGROUND
PLKNLTRVSSKIA EVQTLSQYFGTMV 5898 4805 BACKGROUND
PLVSLTKSIQQFA EIGLLGKAFNDMV 5899 4806 BACKGROUND
LLKQVTAASKEIA EIGQLAKGFNHMS 5900 4807 BACKGROUND
LLKQVTAASKEIA EIGQLAKGFNHMS 5900 4807 BACKGROUND
PLGTVILGLETLT EIGLLARSFEHLR 5901 4808 BACKGROUND
PIDKIKTAVQRAE EIGVLS

PLLLTVKEAQRIA ELSLLVEAFNTMR 5995 4894 BACKGROUND
PLLLTVKEAQRIA ELSLLVEAFNTMR 5995 4894 BACKGROUND
PLHRVQKASADIA ELARLAQSVEQMR 5996 4895 BACKGROUND
PLHRVQKASADIA ELARLAQSVEQMR 5996 4895 BACKGROUND
PLAKLTHVATRIA EVGELARGFQVMV 5997 4896 BACKGROUND
PLAKLTHVATRIA EVGELARGFQVMV 5997 4896 BACKGROUND
NIGQVTKLTSNIC ELGHMAQDLNKMS 5998 4897 BACKGROUND
PLLKITTALKNIA EFAEVGRVINEMT 5999 4898 BACKGROUND
PLLKITTALKNIA EFAEVGRVINEMT 5999 4898 BACKGROUND
PVNELLRHTARLA ELGELMRAFQLMT 6000 4899 BACKGROUND
PLQQLQKRFQQVA EMGNLARSFAAMV 6001 4900 BACKGROUND
PLQQLQKRFQQVA EMGNLARSFAAMV 6001 4900 BACKGROUND
PVRQLSVLAEGFG EFGELARHFNQAT 6002 4901 BACKGROUND
PVRQLSVLAEGFG EFGELARHFNQAT 6002 4901 BACKGROUND
PIVEITESSRKLS EIGVLAQSLNSSV 6003 4902 BACKGROUND
PIVEITESSRKLS EIGVLAQSLNSSV 6003 4902 BACKGROUND
HLAGLTQASEAVA EVGRLGAAFNAMS 6004 4903 BACKGROUND
PLAACAKAALEIA EIGQLQEAFNDML 6005 4904 BACKGROUND
RLRMLTHAFEQAG ELGDLSHHFNQMN 6006 4905 BACKGROUND
RLRMLTHAFEQAG ELGDLSHHFNQMN 6006 4905 BACKGROUND
PMRKLVGSFRHVE EFGYLY

NLALLKSASQRIL EIGKTMLAFNQMV 6098 4990 BACKGROUND
PLMSMSDHFMKLA EIGEVFEQLKLMQ 6099 4991 BACKGROUND
PLMSMSDHFMKLA EIGEVFEQLKLMQ 6099 4991 BACKGROUND
IVNQIKRVVERLA ELCALGADLDRVI 6100 4992 BACKGROUND
RMDELVTLARQVR EIGILSQAFSDVV 6101 4993 BACKGROUND
RMDELVTLARQVR EIGILSQAFSDVV 6101 4993 BACKGROUND
RLKRIGEVANAIS FIGDMANSFNLMS 6102 4994 BACKGROUND
RLKRIGEVANAIS FIGDMANSFNLMS 6102 4994 BACKGROUND
RVGRLKASAVEIS EIDDLTEAIRTMQ 6103 4995 BACKGROUND
RVGRLKASAVEIS EIDDLTEAIRTMQ 6103 4995 BACKGROUND
PVRRMLVNFEHLA ELGRMQKAFARML 6104 4996 BACKGROUND
PVRRMLVNFEHLA ELGRMQKAFARML 6104 4996 BACKGROUND
PLVALADTAQRIS EVTNLAQALESMQ 6105 4997 BACKGROUND
PLVALADTAQRIS EVTNLAQALESMQ 6105 4997 BACKGROUND
PVQYWVSRIQAIA EIGILGNNIQQMQ 6106 4998 BACKGROUND
PFTQLVEAMKHND EIGALFSGYNEMI 6107 4999 BACKGROUND
PFTQLVEAMKHND EIGALFSGYNEMI 6107 4999 BACKGROUND
SFEEMSNAAFRIS SLGEVGHAFNSMV 6108 5000 BACKGROUND
SFEEMSNAAFRIS SLGEVGHAFNSMV 6108 5000 BACKGROUND
YIRKIMVITSDIA ELGTLAKNVNNMK 6109 5001 BACKGROUND
PINAIVKDAARVA EIDNLQ

PLRSLTAAAEQVA DIDRLAQAFRHMQ 6203 5084 BACKGROUND
PIMDIAEISQQLD ETTQLISAFKSMS 6204 5085 BACKGROUND
PIMDIAEISQQLD ETTQLISAFKSMS 6204 5085 BACKGROUND
PITGIAASVSRVD EIVILASGFNQMV 6205 5086 BACKGROUND
SIENVAAKMNLFD ELRIIESSFNRML 6206 5087 BACKGROUND
SIENVAAKMNLFD ELRIIESSFNRML 6206 5087 BACKGROUND
PLRQSLDAAMAIA ELLELNLALGQMV 6207 5088 BACKGROUND
PLRQSLDAAMAIA ELLELNLALGQMV 6207 5088 BACKGROUND
PLASVTKSIEDLT ETELITDAFNKML 6208 5089 BACKGROUND
PLASVTKSIEDLT ETELITDAFNKML 6208 5089 BACKGROUND
PLNTLMQLTDNIA EVANLAHSIELME 6209 5090 BACKGROUND
PLNTLMQLTDNIA EVANLAHSIELME 6209 5090 BACKGROUND
HVAVISETLDQVT DLARIGAKVDQMA 6210 5091 BACKGROUND
HVAVISETLDQVT DLARIGAKVDQMA 6210 5091 BACKGROUND
PLRRVAQFASELT EIDLVVEGFSVLQ 6211 5092 BACKGROUND
RLTLGAQAARKIA EVAAFASAVDTAV 6212 5093 BACKGROUND
RLTLGAQAARKIA EVAAFASAVDTAV 6212 5093 BACKGROUND
IIDQINQVMRKVN RLTELKTFINQTV 6213 5094 BACKGROUND
IIDQINQVMRKVN RLTELKTFINQTV 6213 5094 BACKGROUND
NLKRLKRGTQRLG ELGDLAIAFNTMS 6214 5095 BACKGROUND
NLKRLKRGTQRLG ELGDLA

SIESATNVATELS RSDKLGHAMNSLA 6312 5180 BACKGROUND
NIDIITDGLSTLA EMGQISQSVNNLA 6313 5181 BACKGROUND
PLEWAARKAEAMS EVAALVKALNGML 6314 5182 BACKGROUND
PIEALAKEVRELE ELTSLVRNLNRLL 6315 5183 BACKGROUND
HLNKISQYAHTLT ELDTVVTALNEMQ 6316 5184 BACKGROUND
HLNKISQYAHTLT ELDTVVTALNEMQ 6316 5184 BACKGROUND
PLQQLSEQMQNQA ELQVLIDSFGAMR 6318 5185 BACKGROUND
PIKYLYSTIEKQK EIGVTIKEFFNLQ 6319 5186 BACKGROUND
PIKYLYSTIEKQK EIGVTIKEFFNLQ 6319 5186 BACKGROUND
YLNRIMKDTKDMA PLASHAENLNHLR 6320 5187 BACKGROUND
YLNRIMKDTKDMA PLASHAENLNHLR 6320 5187 BACKGROUND
PLKALTRRVGDFA DIQGLIDAHNAME 6321 5188 BACKGROUND
KLESLMFAAKNVI IFARLAHTFNNMR 6322 5189 BACKGROUND
KLESLMFAAKNVI IFARLAHTFNNMR 6322 5189 BACKGROUND
RIERLAQRAEQVG ALGRLARQLTHSH 6323 5190 BACKGROUND
RIERLAQRAEQVG ALGRLARQLTHSH 6323 5190 BACKGROUND
QIAEITDALIDVK LVAPLAYEINEIV 6324 5191 BACKGROUND
RFESLAEHAQLVT EFNALADSLQQMS 6325 5192 BACKGROUND
NIHRLVAYARSLS DLTQLAQAMDEMR 6326 5193 BACKGROUND
NIHRLVAYARSLS DLTQLAQAMDEMR 6326 5193 BACKGROUND
PIVLIAKKVSESR EIRHLI

PIQRLAKDVSKIE EVHSLSRTIQAML 6415 5278 BACKGROUND
PIQRLAKDVSKIE EVHSLSRTIQAML 6415 5278 BACKGROUND
SIEALRHVVHLQR DVRGLAADFNHLT 6416 5279 BACKGROUND
SIEALRHVVHLQR DVRGLAADFNHLT 6416 5279 BACKGROUND
SLETLAEAARRLS EVRELATATAAMA 6417 5280 BACKGROUND
SLETLAEAARRLS EVRELATATAAMA 6417 5280 BACKGROUND
PLRRLAAIAAHVA EVGIVGHALNRLL 6418 5281 BACKGROUND
PLRRLAAIAAHVA EVGIVGHALNRLL 6418 5281 BACKGROUND
PINRLVRRAEEYR EFRQLSGALNRML 6419 5282 BACKGROUND
PINRLVRRAEEYR EFRQLSGALNRML 6419 5282 BACKGROUND
PLNMVGEALQGVL EVRPLLRYIDKLM 6420 5283 BACKGROUND
PLNMVGEALQGVL EVRPLLRYIDKLM 6420 5283 BACKGROUND
KLKLFSQVSEKVA EYDQALDAMEHMR 6422 5284 BACKGROUND
KLKLFSQVSEKVA EYDQALDAMEHMR 6422 5284 BACKGROUND
RLQLLVTAVRRYR EVDLLGTAISDMS 6423 5285 BACKGROUND
RLQLLVTAVRRYR EVDLLGTAISDMS 6423 5285 BACKGROUND
PLKQLADLVNGVA EIGILAQTLETAM 6424 5286 BACKGROUND
PLKQLADLVNGVA EIGILAQTLETAM 6424 5286 BACKGROUND
PIKRLTRYANKIR ELRDMGLALEDMR 6426 5287 BACKGROUND
PIKRLTRYANKIR ELRDMGLALEDMR 6426 5287 BACKGROUND
NMKKVYQITKNFS VLYGLY

5314

In [20]:
data_df.to_pickle(os.path.join(data_dir, 'af2_newrun.p'))

In [21]:
# checked, OK