In [13]:
import sqlite3
import pandas as pd
import os
import sys
from Bio.SubsMat import MatrixInfo as matlist
from Bio import pairwise2
from Bio.pairwise2 import format_alignment
import math
import numpy as np
def fasta2List(pathFasta):
    f = open(pathFasta, "r")
    title = []
    seq = []
    seq_temp = []
    for line in f:
        if line[0] == ">":
            seq.append(''.join(seq_temp).replace("\n", ""))
            title.append(line.replace("\n", ""))
            seq_temp = []
        else:
            seq_temp.append(line)
    seq.append(''.join(seq_temp).replace("\n", ""))
    seq.pop(0)
    dictionary = dict(zip(title, seq))
    return dictionary

conn = sqlite3.connect('../../mismatch_db.db')

In [3]:
mismatch = pd.read_sql_query("SELECT * FROM mismatch", conn)
mismatch = mismatch.astype({"exon_start_prim": "Int64", "exon_stop_prim":"Int64", "exon_start_hum": "Int64", "exon_stop_hum":"Int64"})

prot_seq_hum = pd.read_sql_query("""
SELECT mismatch.mismatch_ID, mismatch.prot_hum, protein.sequence
FROM mismatch
JOIN protein ON mismatch.prot_hum = protein.prot_ID""", conn)

prot_seq_prim = pd.read_sql_query("""
SELECT mismatch.mismatch_ID, mismatch.prot_prim, protein.sequence
FROM mismatch
JOIN protein ON mismatch.prot_prim = protein.prot_ID""", conn)

prim_exon_introns = pd.read_sql_query("""
SELECT mismatch_ID, mismatch.prot_prim, exon_intron_map.'type', exon_intron_map.number_elem, exon_intron_map.seq
FROM mismatch
JOIN protein ON mismatch.prot_prim = protein.prot_ID
JOIN transcript ON protein.transcript_ID = transcript.transcript_ID
JOIN exon_intron_map ON transcript.transcript_ID = exon_intron_map.transcript_ID
""", conn)

human_exon_introns = pd.read_sql_query("""
SELECT mismatch_ID, mismatch.prot_hum, exon_intron_map.'type', exon_intron_map.number_elem, exon_intron_map.seq
FROM mismatch
JOIN protein ON mismatch.prot_hum = protein.prot_ID
JOIN transcript ON protein.transcript_ID = transcript.transcript_ID
JOIN exon_intron_map ON transcript.transcript_ID = exon_intron_map.transcript_ID
""", conn)

In [39]:
# Flagging des mismatch conservés V
counter = 0
total = 0
index_conserved = []
for index, row in mismatch.iloc[:,:].iterrows():
    total+=1
    conserved = 0

    mySeq = fasta2List("../../data/raw/uniprot-sequence/"+row[1]+".id.fasta")
    mismtaching_seq = row[11]

    for i, j in mySeq.items():
        myAlign = pairwise2.align.localms(j, mismtaching_seq, 2, 0, -.5, -.1, one_alignment_only=True, score_only=True)
        #print(format_alignment(*myAlign[0]))
        if myAlign/len(mismtaching_seq) > 1.8:
            conserved +=1

    if conserved >= 4:
        index_conserved.append(index)
        #print(str(index)+'\t'.join([str(x) for x in row]))
        counter +=1
print(counter)
print(total)
print(index_conserved)

728
19012
[45, 46, 87, 90, 91, 92, 112, 113, 115, 116, 117, 118, 256, 258, 260, 261, 263, 264, 265, 269, 270, 272, 273, 276, 278, 280, 281, 283, 284, 285, 289, 290, 292, 293, 296, 298, 300, 301, 303, 304, 307, 308, 309, 313, 314, 317, 318, 400, 402, 404, 405, 407, 408, 411, 412, 413, 417, 418, 421, 422, 427, 429, 431, 432, 434, 435, 436, 440, 441, 443, 444, 462, 463, 464, 465, 636, 638, 639, 640, 719, 722, 727, 728, 794, 797, 798, 799, 1635, 2038, 2040, 2114, 2149, 2152, 2153, 2311, 2312, 2410, 2411, 2412, 2413, 2414, 2415, 2487, 2488, 2492, 2494, 2500, 2509, 2556, 2557, 2559, 2561, 2592, 2594, 2597, 2598, 2599, 2600, 2601, 2722, 2723, 2725, 2727, 3023, 3024, 3025, 3026, 3027, 3028, 3030, 3031, 3032, 3033, 3034, 3035, 3067, 3161, 3164, 3169, 3171, 3468, 3469, 3470, 3471, 3472, 3556, 3557, 3558, 3560, 3561, 3584, 3601, 3603, 3605, 3606, 3607, 3866, 3867, 3868, 3870, 3871, 3872, 3942, 3943, 3946, 3947, 3949, 3950, 3951, 3954, 3955, 3967, 4222, 4223, 4224, 4226, 4229, 4384, 4385, 4386, 43

In [38]:
# Flagging des erreurs d'alignement V
counter = 0
total = 0
index_align_error = []
for index, row in mismatch.iloc[:,:].iterrows():
    try:
        human_seq = prot_seq_hum.loc[prot_seq_hum["prot_hum"]== row[1]].iloc[0,2]
    except:
        continue
    total +=1
    mismtaching_seq = row[11]
    myAlign = pairwise2.align.localms(human_seq, mismtaching_seq, 2, 0, -.5, -.1, one_alignment_only=True, score_only=True)
    if myAlign/len(mismtaching_seq) > 1.8:
        index_align_error.append(index)
        #print(str(index)+'\t'.join([str(x) for x in row]))
        counter += 1

print(counter)
print(total)
print(index_align_error)


160
18714
[45, 46, 462, 463, 464, 465, 1690, 2038, 2040, 2129, 2149, 2151, 2152, 2153, 2293, 2311, 2312, 3161, 3164, 3556, 3557, 3558, 3560, 3561, 3584, 3906, 3967, 4478, 4840, 5330, 5331, 5332, 5371, 5807, 6599, 6700, 6705, 6706, 6874, 7317, 7907, 8007, 8012, 8013, 8014, 8019, 8023, 8033, 8254, 8381, 8382, 8385, 8387, 8528, 8551, 8553, 8555, 8556, 8567, 8837, 8839, 8841, 8844, 8846, 8847, 9109, 9359, 9626, 9628, 10042, 10499, 10574, 11311, 11314, 11807, 12013, 12014, 12015, 12016, 12017, 12059, 12074, 12428, 12450, 12452, 12453, 12454, 12633, 12655, 12656, 12659, 12660, 12662, 12663, 12664, 12665, 12667, 12669, 12670, 12671, 12672, 12673, 13697, 13701, 13702, 13703, 13704, 13706, 13708, 13801, 13913, 13914, 13934, 13935, 13937, 13938, 13939, 14369, 14559, 14772, 15116, 15117, 15118, 15119, 15120, 15121, 15122, 15412, 15713, 15715, 16021, 16022, 16096, 16097, 16169, 16375, 17009, 17010, 17011, 17012, 17100, 17101, 17107, 17431, 17838, 18310, 18311, 18318, 18319, 18493, 18565, 18566, 18

In [37]:
# Flagging des seq à repeats proteiq V
counter = 0
total = 0
index_repeats = []
for index, row in mismatch.iloc[:,:].iterrows():
    try:
        human_prot_Mismatch = prot_seq_hum.loc[prot_seq_hum["prot_hum"]== row[1]].iloc[0,2]
    except:
        continue

    total+=1
    human_seq = row[12]
    begin_seq = human_prot_Mismatch[:row[5]+1]
    end_seq = human_prot_Mismatch[row[6]+2:]
    human_prot_Mismatch = begin_seq + end_seq
    myAlign = pairwise2.align.localms(human_prot_Mismatch, human_seq, 2, 0, -.5, -.1, one_alignment_only=True, score_only=True)
    if myAlign/len(human_seq) > 1.8:
        index_repeats.append(index)
        counter += 1

print(counter)
print(total)
print(index_repeats)

257
18714
[34, 35, 36, 37, 38, 39, 42, 43, 609, 884, 1000, 1001, 1002, 1385, 1391, 1614, 1618, 1619, 1620, 1621, 1622, 1623, 1625, 1626, 1627, 1631, 1639, 1746, 1755, 1757, 1758, 1759, 1761, 1847, 1848, 1849, 1872, 2061, 2065, 2256, 2301, 2346, 2359, 2365, 2367, 2368, 2370, 2372, 2373, 2424, 2564, 2565, 2567, 2568, 2573, 3160, 3552, 3878, 3879, 3880, 3881, 3882, 3883, 3885, 3886, 3887, 3888, 3889, 3890, 3891, 3893, 3896, 3897, 3898, 3900, 3901, 3903, 3906, 6150, 6586, 6592, 6699, 6700, 6702, 6703, 6704, 6705, 6706, 6924, 7113, 7332, 7539, 7542, 7882, 7884, 7885, 7886, 7904, 7907, 8021, 8023, 8105, 8106, 8107, 8108, 8109, 8110, 8111, 8112, 8113, 8254, 8386, 8391, 8392, 8393, 8394, 8395, 8398, 8400, 8401, 8403, 8404, 8405, 8406, 8407, 8410, 8412, 8414, 8415, 8416, 8417, 8418, 8419, 8551, 8558, 8560, 8561, 8562, 8563, 8565, 8566, 8569, 8837, 8839, 8841, 8844, 8846, 8847, 10488, 10491, 10495, 11256, 11675, 11676, 11677, 11678, 11679, 11680, 11681, 11682, 11683, 11684, 11689, 11690, 11691, 

In [35]:
#Flagging des seq à N 

counter = 0
total = 0
index_genom_n = []
for index, row in mismatch.iloc[:,:].iterrows():
    my_CDS = []
    subset_exon_intron = prim_exon_introns.loc[prim_exon_introns["mismatch_ID"] == index]
    if isinstance(row[7], int) == False or isinstance(row[8], int) == False:
        continue
    try:
        for n in range(row[7], row[8]+1):
            my_exon = subset_exon_intron.loc[(subset_exon_intron["number_elem"] == n) & (subset_exon_intron["type"] == "Exon")]
            my_intron = subset_exon_intron.loc[(subset_exon_intron["number_elem"] == n) & (subset_exon_intron["type"] == "Intron")]
            try:
                my_CDS.append(my_exon.iloc[0,4])
                my_CDS.append(my_intron.iloc[0,4])
            except:
                pass
        if "N" in ''.join(my_CDS):
            index_genom_n.append(index)
            counter +=1
        total +=1
    except:
        pass

print(counter)
print(total)

4723
17470


In [34]:
# Flagging des 1 exon human vs multiple primate V
counter = 0
total = 0
index_multiple_exon = []
for index, row in mismatch.iloc[:, :].iterrows():
    if isinstance(row[7], int) == False or isinstance(row[9], int) == False:
        continue
    total +=1
    if (int(row[9])-int(row[10]) == 0) and (row[8]-row[7] >= 2):
        #print(str(index)+'\t'.join([str(x) for x in row]))
        index_multiple_exon.append(index)
        counter += 1

print(counter)
print(total)
print(index_multiple_exon)

470
16061
[54, 110, 137, 175, 232, 238, 323, 327, 353, 461, 534, 665, 680, 687, 731, 773, 774, 782, 783, 823, 937, 956, 975, 1022, 1026, 1029, 1112, 1117, 1118, 1143, 1149, 1251, 1294, 1320, 1329, 1352, 1353, 1393, 1419, 1420, 1422, 1428, 1488, 1494, 1548, 1554, 1621, 1768, 1813, 1854, 1973, 2028, 2035, 2042, 2149, 2422, 2433, 2439, 2536, 2722, 2723, 2725, 2726, 2727, 2747, 2756, 2768, 2775, 2800, 2815, 2821, 2930, 2979, 3053, 3079, 3126, 3130, 3163, 3193, 3201, 3220, 3309, 3326, 3344, 3360, 3466, 3476, 3493, 3644, 3645, 3665, 3738, 3763, 3775, 3885, 3937, 3998, 4003, 4023, 4085, 4112, 4236, 4243, 4259, 4265, 4285, 4315, 4360, 4428, 4534, 4605, 4691, 4787, 4902, 4918, 4960, 5080, 5094, 5144, 5176, 5177, 5178, 5214, 5254, 5256, 5344, 5394, 5400, 5406, 5416, 5431, 5447, 5461, 5589, 5598, 5599, 5650, 5653, 5659, 5660, 5666, 5672, 5673, 5688, 5708, 5892, 5909, 5947, 6185, 6260, 6290, 6382, 6468, 6475, 6485, 6496, 6498, 6556, 6737, 6775, 6849, 6856, 6951, 7031, 7083, 7111, 7112, 7204, 7209,

In [124]:
# Stat sur les sites introniques Humains

canonical = 0
non_canonical = 0
human_intron_seq = human_exon_introns.loc[human_exon_introns["type"]=="Intron"]
for index, row in human_intron_seq.iloc[:, [0,4]].iterrows():
    if row[1][:2] == "GT" and row[1][-2:] == "AG":
        canonical += 1
    #elif row[6][:2] == "GC" and row[6][-2:] == "AG":
    #    canonical += 1
    #elif row[6][:2] == "AT" and row[6][-2:] == "AC":
    #    canonical += 1
    else:
        non_canonical += 1
print("Canonical (GT/AG) " + str(canonical/(canonical+non_canonical)*100))
print("Non canonical " + str(non_canonical/(canonical+non_canonical)*100))

Canonical (GT/AG) 99.06727804188617
Non canonical 0.9327219581138378


In [8]:
mismatch

Unnamed: 0,mismatch_ID,prot_hum,prot_prim,pos_start_prim,pos_stop_prim,pos_start_hum,pos_stop_hum,exon_start_prim,exon_stop_prim,exon_start_hum,exon_stop_hum,seq_prim,seq_hum
0,0,A0A044PY82,H0WKS4,1,55,0,50,1,3,1,2,AGNPAQPSPGRESLARSKAAPGAPSGLSCAQQGCRGRRPALPLGGR...,MAPQRAGPGLGKPSSKKSRALRGGPGVPRARQRSPRAAAELNTEVP...
1,1,A0A044PY82,H0WKS4,108,119,110,122,3,3,2,2,KISSGSSLETPQ,PKTSSSRSGSEAP
2,2,A0A044PY82,G3QIT3,92,111,91,119,2,3,2,2,GPSDRSAKGGGPGAASPSSP,PGPSEAQRAEVQGQHPPPGPKTSSSRSGS
3,3,A0A075B6I0,A0A0D9RMQ9,74,97,78,117,2,2,2,2,CPSGVPESLLWLHPWEQSCPHHHG,TRSSGVPDRFSGSILGNKAALTITGAQADDESDYYCVLYM
4,4,A0A075B6I3,A0A096NSB3,36,51,35,50,2,2,2,2,ASLTCTFSGGINVAGY,TARLPCTLSSDLSVGG
...,...,...,...,...,...,...,...,...,...,...,...,...,...
19007,19007,Q6UXU4,F7ILG0,4,37,0,30,2,2,1,1,ELAKAFSSQRTRLSAILSMLSLSFSTISLFSNYW,MKTSRRGRALLAVALNLLALLFATTAFLTTH
19008,19008,Q6UXU4,F7ILG0,44,77,37,83,2,2,1,1,VPKPLCEKDLAAKCFDMPVSLDGDVANTSTQEVV,RVPKPGCGQGGRANCPNSGANATANGTAAPAAAAAAATASGNGPPGG
19009,19009,Q6UXU4,F7ILG0,115,245,121,273,3,5,2,5,FIELTPPTERGEKGLLEFATLQGPCHPTLRFGGKQLMEKASLPYPP...,SFIDLAPASEKGVLWLSVVSEVLYILLLVVGFSLMCLELFHSSNVI...
19010,19010,Q6UXU4,F7ILG0,247,286,276,327,5,6,5,7,LELRLGLLHGLAFLHLLHGVGGHHLQYVHQDGAGVQVQA,RMEKRDGSEEDFHLDCRHERYPARHQPHMADSWPRSSAQEAPELNR...


In [44]:
%%time
# Flagging des mismatch ayant un site humain de splicing non canonique V
counter = 0
total = 0
index_non_canon = []
for index, row in mismatch.iloc[:, :].iterrows():
    subset_intron = human_exon_introns.loc[(human_exon_introns['mismatch_ID'] == row[0]) & (human_exon_introns['type'] == "Intron")]
    if subset_intron.empty or row[9] == "ERROR":
        continue
    total += 1
    for i in range(int(row[9]),int(row[10])+2):
        intron = subset_intron.loc[subset_intron["number_elem"] == (i-1)]
        if intron.empty:
            continue
        intron = intron.iloc[0,:].to_list()
        #if  (intron[6][:2] == "GT" and intron[6][-2:] == "AG") \
        #    or (intron[6][:2] == "GC" and intron[6][-2:] == "AG") \
        #    or (intron[6][:2] == "AT" and intron[6][-2:] == "AC") and (len(intron[6]) > 30):
        if  (intron[4][:2] == "GT" and intron[4][-2:] == "AG") and (len(intron[4]) > 30):
            pass
        else:
            index_non_canon.append(index)
            counter +=1
            #print(str(index)+'\t'.join([str(x) for x in row]))
            break        

print(counter)
print(total)
print(index_non_canon)

414
16982
[63, 67, 97, 102, 105, 124, 125, 161, 162, 163, 164, 165, 205, 223, 322, 324, 377, 389, 818, 820, 821, 846, 892, 893, 1107, 1108, 1114, 1116, 1194, 1195, 1196, 1231, 1260, 1402, 1766, 1773, 1779, 2033, 2034, 2039, 2041, 2052, 2053, 2054, 2058, 2059, 2060, 2160, 2297, 2298, 2301, 2401, 2454, 2455, 2459, 2460, 2461, 2462, 2463, 2464, 2465, 2466, 2467, 2468, 2555, 2556, 2557, 2559, 2561, 2637, 2715, 2788, 2903, 2904, 2943, 2946, 3006, 3009, 3064, 3084, 3187, 3206, 3308, 3415, 3421, 3422, 3423, 3424, 3425, 3426, 3428, 3488, 3542, 3627, 3628, 3704, 3728, 3833, 4006, 4280, 4281, 4374, 4444, 4519, 4525, 4526, 4597, 4623, 4681, 4682, 4684, 4696, 4699, 4703, 4707, 4724, 4727, 4728, 4729, 4754, 4797, 4828, 4876, 4878, 4887, 4888, 5004, 5076, 5088, 5199, 5208, 5394, 5397, 5398, 5434, 5456, 5461, 5462, 5533, 5549, 5633, 5635, 5766, 5801, 5804, 5813, 5814, 5815, 5816, 5818, 5819, 5830, 5831, 5832, 5898, 5899, 5925, 6401, 6503, 6557, 6567, 6613, 6695, 6945, 6946, 7101, 7103, 7104, 7106, 71

In [11]:
# Flagging mismatch with too small introns V
counter = 0
total = 0
index_intron_small = []
for index, row in mismatch.iloc[:, :].iterrows():
    subset_intron = prim_exon_introns.loc[(prim_exon_introns['mismatch_ID'] == row[0]) & (prim_exon_introns['type'] == "Intron")]
    if subset_intron.empty or row[7] == "ERROR":
        continue
    total += 1
    for i in range(int(row[7]),int(row[8])):
        intron = subset_intron.loc[subset_intron["number_elem"] == i]
        intron = intron.iloc[0,:].to_list()
        if len(intron[4]) <= 29:
            index_intron_small.append(index)
            counter +=1
            #print(str(index)+'\t'.join([str(x) for x in row]))
            break

print(counter)
print(total)
print(index_intron_small)

903
16936
[21, 52, 67, 84, 89, 114, 126, 155, 254, 257, 277, 297, 341, 342, 343, 401, 428, 451, 461, 534, 555, 564, 571, 629, 684, 686, 687, 710, 726, 734, 735, 822, 823, 860, 868, 881, 911, 912, 913, 934, 935, 936, 946, 956, 957, 977, 1071, 1099, 1152, 1169, 1224, 1225, 1327, 1329, 1353, 1368, 1371, 1393, 1397, 1420, 1445, 1449, 1510, 1540, 1541, 1542, 1572, 1573, 1645, 1654, 1690, 1700, 1768, 1769, 1772, 1776, 1797, 1809, 1810, 1825, 1854, 1855, 1859, 1862, 2035, 2042, 2094, 2129, 2225, 2266, 2270, 2310, 2323, 2327, 2401, 2462, 2529, 2530, 2531, 2569, 2607, 2608, 2609, 2610, 2620, 2621, 2622, 2630, 2631, 2632, 2665, 2671, 2677, 2691, 2693, 2694, 2705, 2717, 2718, 2732, 2765, 2774, 2791, 2825, 2862, 2900, 2947, 2950, 2974, 2975, 2976, 3001, 3006, 3022, 3068, 3069, 3077, 3079, 3091, 3100, 3126, 3134, 3153, 3160, 3162, 3173, 3203, 3218, 3229, 3241, 3286, 3308, 3320, 3321, 3322, 3326, 3327, 3358, 3367, 3375, 3376, 3434, 3466, 3474, 3476, 3484, 3508, 3527, 3538, 3577, 3579, 3596, 3619, 36

In [14]:
column_names = ["mismatch_ID", "conserved", "one_hum_multiple_prim", "non_canonical_hum_spl", "N_in_genomic", "small_introns", "repeats_prot", "alignement_error", "human_isoform_exist"]
df = pd.DataFrame(columns = column_names)
df["mismatch_ID"] = mismatch["mismatch_ID"]

In [45]:
df['conserved'] = np.where(df["mismatch_ID"].isin(index_conserved), 1, 0)
df['one_hum_multiple_prim'] = np.where(df["mismatch_ID"].isin(index_multiple_exon), 1, 0)
df['non_canonical_hum_spl'] = np.where(df["mismatch_ID"].isin(index_non_canon), 1, 0)
df['N_in_genomic'] = np.where(df["mismatch_ID"].isin(index_genom_n), 1, 0)
df['small_introns'] = np.where(df["mismatch_ID"].isin(index_intron_small), 1, 0)
df['repeats_prot'] = np.where(df["mismatch_ID"].isin(index_repeats), 1, 0)
df['alignement_error'] = np.where(df["mismatch_ID"].isin(index_align_error), 1, 0)
#df['human_isoform_exist'] = np.where(df["mismatch_ID"].isin(index_conserved), 1, 0)
#df.to_sql(con=conn, name='mismatch_flag', index=False, if_exists="append")


In [53]:
tata = pd.read_sql_query(
"""SELECT * FROM mismatch_flag 
WHERE conserved = 0 
AND one_hum_multiple_prim = 0 
AND non_canonical_hum_spl = 0
AND N_in_genomic = 0
AND small_introns = 0
AND repeats_prot = 0
AND alignement_error = 0
""", conn)
tata

In [55]:
mismatch

Unnamed: 0,mismatch_ID,prot_hum,prot_prim,pos_start_prim,pos_stop_prim,pos_start_hum,pos_stop_hum,exon_start_prim,exon_stop_prim,exon_start_hum,exon_stop_hum,seq_prim,seq_hum
0,0,A0A044PY82,H0WKS4,1,55,0,50,1,3,1,2,AGNPAQPSPGRESLARSKAAPGAPSGLSCAQQGCRGRRPALPLGGR...,MAPQRAGPGLGKPSSKKSRALRGGPGVPRARQRSPRAAAELNTEVP...
1,1,A0A044PY82,H0WKS4,108,119,110,122,3,3,2,2,KISSGSSLETPQ,PKTSSSRSGSEAP
2,2,A0A044PY82,G3QIT3,92,111,91,119,2,3,2,2,GPSDRSAKGGGPGAASPSSP,PGPSEAQRAEVQGQHPPPGPKTSSSRSGS
3,3,A0A075B6I0,A0A0D9RMQ9,74,97,78,117,2,2,2,2,CPSGVPESLLWLHPWEQSCPHHHG,TRSSGVPDRFSGSILGNKAALTITGAQADDESDYYCVLYM
4,4,A0A075B6I3,A0A096NSB3,36,51,35,50,2,2,2,2,ASLTCTFSGGINVAGY,TARLPCTLSSDLSVGG
...,...,...,...,...,...,...,...,...,...,...,...,...,...
19007,19007,Q6UXU4,F7ILG0,4,37,0,30,2,2,1,1,ELAKAFSSQRTRLSAILSMLSLSFSTISLFSNYW,MKTSRRGRALLAVALNLLALLFATTAFLTTH
19008,19008,Q6UXU4,F7ILG0,44,77,37,83,2,2,1,1,VPKPLCEKDLAAKCFDMPVSLDGDVANTSTQEVV,RVPKPGCGQGGRANCPNSGANATANGTAAPAAAAAAATASGNGPPGG
19009,19009,Q6UXU4,F7ILG0,115,245,121,273,3,5,2,5,FIELTPPTERGEKGLLEFATLQGPCHPTLRFGGKQLMEKASLPYPP...,SFIDLAPASEKGVLWLSVVSEVLYILLLVVGFSLMCLELFHSSNVI...
19010,19010,Q6UXU4,F7ILG0,247,286,276,327,5,6,5,7,LELRLGLLHGLAFLHLLHGVGGHHLQYVHQDGAGVQVQA,RMEKRDGSEEDFHLDCRHERYPARHQPHMADSWPRSSAQEAPELNR...
