In [1]:
import pandas as pd
import numpy as np
import os
from collections import Counter
import re

# preprocessing mutation.tsv

In [11]:
fileName = r'../data/raw/mutations.tsv'
df = pd.read_csv(fileName, sep='\t', keep_default_na=False)
print(df.shape)
df.head()

(58250, 15)


Unnamed: 0,#Feature AC,Feature short label,Feature range(s),Original sequence,Resulting sequence,Feature type,Feature annotation,Affected protein AC,Affected protein symbol,Affected protein full name,Affected protein organism,Interaction participants,PubMedID,Figure legend,Interaction AC
0,EBI-6915452,p.[Leu83Ala;Leu87Ala;Leu91Ala],83-83,L,A,mutation(MI:0118),,uniprotkb:P03243-1,p03243-1,,28285 - Human adenovirus C serotype 5 (HAdV-5),"uniprotkb:P03243-1(protein(MI:0326), 28285 - H...",20639899,Fig. 1d,EBI-2941418
1,EBI-6915452,p.[Leu83Ala;Leu87Ala;Leu91Ala],87-87,L,A,mutation(MI:0118),,uniprotkb:P03243-1,p03243-1,,28285 - Human adenovirus C serotype 5 (HAdV-5),"uniprotkb:P03243-1(protein(MI:0326), 28285 - H...",20639899,Fig. 1d,EBI-2941418
2,EBI-6915452,p.[Leu83Ala;Leu87Ala;Leu91Ala],91-91,L,A,mutation(MI:0118),,uniprotkb:P03243-1,p03243-1,,28285 - Human adenovirus C serotype 5 (HAdV-5),"uniprotkb:P03243-1(protein(MI:0326), 28285 - H...",20639899,Fig. 1d,EBI-2941418
3,EBI-6925687,p.Cys169Ser,169-169,C,S,mutation(MI:0118),,uniprotkb:P0A6H1,clpX,,83333 - Escherichia coli (strain K12),"uniprotkb:P0A6H1(protein(MI:0326), 83333 - Esc...",23622246,Supp Fig. 2A,EBI-6925660
4,EBI-6898360,p.Phe508del,508-508,F,.,mutation(MI:0118),,uniprotkb:P13569,CFTR,,9606 - Homo sapiens,"uniprotkb:P13569(protein(MI:0326), 9606 - Homo...",22038833,"1B, 4B",EBI-6898336


In [12]:
# drop high-throught
df = df[~(df['Feature annotation'].str.contains('high-throughput'))]
df.shape

(47222, 15)

## drop entries with >2 participants, and drop entries that the number of partner don't match the number of uniprotAC. (to filter binary protein-protein interaction)

In [13]:
import re
p = re.compile(r'uniprotkb:(.*?)[(]', re.S)
partner = []
n_partner = []
count = 0
for i in df['Interaction participants']:
    tmp = re.findall(p, i)
#     num = re.findall(p2, i)
    num = i.count(';') + 1
    partner.append(tmp)
    n_partner.append(num)
df['partners'] = partner
df['n_partner'] = n_partner

df = df[df['n_partner'] < 3]
print('after delete items with more than 2 partners {}'.format(df.shape))
df = df[df['partners'].apply(lambda x: len(x)) == df['n_partner']]
print('after delete items with not identical number of partners and n_partner {}'.format(df.shape))

after delete items with more than 2 partners (43126, 17)
after delete items with not identical number of partners and n_partner (35267, 17)


## drop entries with same interactionAC but different affected protein AC (drop same interaction with multiple mutations)

In [14]:
df1 = df[df.duplicated(['Affected protein AC', 'Interaction AC'], keep=False)] # choose items with same interactAC-aff pro AC pair
df2 = df.drop_duplicates(['Interaction AC'], keep=False) # choose items with only one time interactionAC
df = pd.concat([df1, df2])
print(df.shape)

(34712, 17)


## drop entries without uniprotAC

In [15]:
df = df[df['Affected protein AC'].str.contains('uniprotkb:', na=False)]
print(df.shape)

(34696, 17)


## simplify uniprotkb label

In [16]:
df['Affected protein AC'] = df['Affected protein AC'].str.replace('uniprotkb:', '')
df.head()

Unnamed: 0,#Feature AC,Feature short label,Feature range(s),Original sequence,Resulting sequence,Feature type,Feature annotation,Affected protein AC,Affected protein symbol,Affected protein full name,Affected protein organism,Interaction participants,PubMedID,Figure legend,Interaction AC,partners,n_partner
0,EBI-6915452,p.[Leu83Ala;Leu87Ala;Leu91Ala],83-83,L,A,mutation(MI:0118),,P03243-1,p03243-1,,28285 - Human adenovirus C serotype 5 (HAdV-5),"uniprotkb:P03243-1(protein(MI:0326), 28285 - H...",20639899,Fig. 1d,EBI-2941418,"[P03243-1, F1M589]",2
1,EBI-6915452,p.[Leu83Ala;Leu87Ala;Leu91Ala],87-87,L,A,mutation(MI:0118),,P03243-1,p03243-1,,28285 - Human adenovirus C serotype 5 (HAdV-5),"uniprotkb:P03243-1(protein(MI:0326), 28285 - H...",20639899,Fig. 1d,EBI-2941418,"[P03243-1, F1M589]",2
2,EBI-6915452,p.[Leu83Ala;Leu87Ala;Leu91Ala],91-91,L,A,mutation(MI:0118),,P03243-1,p03243-1,,28285 - Human adenovirus C serotype 5 (HAdV-5),"uniprotkb:P03243-1(protein(MI:0326), 28285 - H...",20639899,Fig. 1d,EBI-2941418,"[P03243-1, F1M589]",2
5,EBI-6925862,p.Cys169Ser,169-169,C,S,mutation(MI:0118),,P0A6H1,clpX,,83333 - Escherichia coli (strain K12),"uniprotkb:P0A6H1(protein(MI:0326), 83333 - Esc...",23622246,Supp Fig. 2C,EBI-6925855,"[P0A6H1, P0A6H1]",2
13,EBI-8875481,p.Ile204Tyr,204-204,I,Y,mutation(MI:0118),,Q8TE30,q8te30_human,,9606 - Homo sapiens,"uniprotkb:Q8TE30(protein(MI:0326), 9606 - Homo...",24267889,"Fig. 6F, Supp. Fig. 7H",EBI-8875425,"[Q8TE30, Q9UN81]",2


## delete 'mutation' feature type

In [17]:
df = df[~df['Feature type'].isin(['mutation(MI:0118)'])]
print(df.shape)

(31792, 17)


## delete non- regular acid items with same featureAC

In [18]:
f_ = df[df['Resulting sequence'].str.contains('B|J|O|Z', na=False)]['#Feature AC'].tolist()
df = df[~df['#Feature AC'].isin(f_)]
print(df.shape)
print(f_)

(31791, 17)
['EBI-8291032']


## delete 'PRO_' uniprotAC in table

In [19]:
df = df[~df['Affected protein AC'].str.contains('PRO_')]
df.shape

(31488, 17)

## get all sequence from uniprot (prepare for uniprot fasta retrieve https://www.uniprot.org/uploadlists/)

In [21]:
def flatlist(acList):
    return [item for sublist in acList for item in sublist]

ac1 = set(flatlist(df['partners'].values.tolist()))
ac2 = set(df['Affected protein AC'].values.tolist())
acAll = ac1 | ac2
with open('acAll.txt', 'w') as f:
    for x in acAll:
        f.write(x + '\n')


In [22]:
p = re.compile('PRO_')
acAll = [x for x in acAll if not p.findall(x)]
len(acAll)

7574

In [23]:
ac = []
info = []
seq = []
seqline = ''
initFlag = True
fastaFile = '../../data/raw/allAC.fasta' # from uniprot website mapping, download with canonical and isoform
with open(fastaFile, 'r') as f:
    for line in f:
        line = line.strip()
        if '>' in line:
            res = re.findall(r'\|([^"]+)\|', line)[0]
            ac.append(res)
            info.append(line)
            if initFlag:
                initFlag = False
            else:
                seq.append(seqline)
                seqline = ''
        else:
            seqline += line
    seq.append(seqline)
fastaTable = pd.DataFrame({'ac': ac, 'info': info, 'seq': seq})

## select valid uniprotAC to make following selection

In [24]:
validAC1 = fastaTable[fastaTable['ac'].isin(acAll)]

validAC2 = fastaTable[~fastaTable['ac'].isin(acAll)]
acAll_series = pd.Series(list(acAll))
validAC2 = validAC2[validAC2['ac'].isin(acAll_series.str.split('-', expand=True)[0])]
validAC = pd.concat([validAC1, validAC2])
print(validAC1.shape)
print(validAC2.shape)
print(validAC.shape)

(7208, 3)
(327, 3)
(7535, 3)


## make the 'affected protein AC' - 'uniprotAC' dict. Some have 'multiple key' -> 'single value' relationship eg: apac['P19838-1'] = 'P19838', apac['P19838'] = 'P19838'

In [25]:
apacKey = []
acValue = []
for ac in acAll:
    if ac in validAC['ac'].values:
        apacKey.append(ac)
        acValue.append(ac)
    elif ac.split('-')[0] in validAC['ac'].values:
        apacKey.append(ac)
        acValue.append(ac.split('-')[0])
apac2ac = dict(zip(apacKey, acValue))


### transform all isoform AC in table into real uniprotAC(canonical with no isoform '-'), eg: O43889-2 ->O43889, O43889-3 -> O43889-3

In [26]:
df = df[df['Affected protein AC'].isin(apac2ac.keys())]

In [27]:
df = df[df['partners'].apply(lambda x: set(x) < set(list(apac2ac.keys())))]

## make interaction with multi position mutations into one 

In [None]:
# pos = df['Feature range(s)'].str.split('-', expand=True)
# df['start'] = pos[0]
# df['end'] = pos[1]

comCol = df.columns.tolist()
comCol.remove('Feature range(s)')
comCol.remove('Original sequence')
comCol.remove('Resulting sequence')

df_1 = df.groupby('#Feature AC')['Feature range(s)','Original sequence', 'Resulting sequence'].agg(list)
df_2 = df[comCol].drop_duplicates('#Feature AC', keep='first')
df = pd.merge(df_1, df_2, on = '#Feature AC')
df.reset_index(drop=True, inplace=True)
df.shape

In [29]:
df.head()

Unnamed: 0,#Feature AC,Feature range(s),Original sequence,Resulting sequence,Feature short label,Feature type,Feature annotation,Affected protein AC,Affected protein symbol,Affected protein full name,Affected protein organism,Interaction participants,PubMedID,Figure legend,Interaction AC,partners,n_partner
0,EBI-10039489,[81-81],[V],[E],p.Val81Glu,mutation disrupting(MI:0573),,P28795,PEX3,,559292 - Saccharomyces cerevisiae,"uniprotkb:Q03694(protein(MI:0326), 559292 - Sa...",23900285,f1c,EBI-10039307,"[Q03694, P28795]",2
1,EBI-10039495,[188-188],[N],[I],p.Asn188Ile,mutation decreasing(MI:0119),,P28795,PEX3,,559292 - Saccharomyces cerevisiae,"uniprotkb:Q03694(protein(MI:0326), 559292 - Sa...",23900285,f1c,EBI-10039491,"[Q03694, P28795]",2
2,EBI-10039551,[81-81],[V],[E],p.Val81Glu,mutation disrupting(MI:0573),,P28795,PEX3,,559292 - Saccharomyces cerevisiae,"uniprotkb:P28795(protein(MI:0326), 559292 - Sa...",23900285,f2a f2b,EBI-10039532,"[P28795, Q03694]",2
3,EBI-10039706,[81-81],[V],[E],p.Val81Glu,mutation disrupting(MI:0573),,P28795,PEX3,,559292 - Saccharomyces cerevisiae,"uniprotkb:P28795(protein(MI:0326), 559292 - Sa...",23900285,f2c,EBI-10039697,"[P28795, Q03694]",2
4,EBI-10039722,[81-81],[V],[E],p.Val81Glu,mutation disrupting(MI:0573),,P28795,PEX3,,559292 - Saccharomyces cerevisiae,"uniprotkb:P28795(protein(MI:0326), 559292 - Sa...",23900285,f2d,EBI-10039716,"[P28795, Q03694]",2


## make mutprotein seq and participants seq

In [30]:
validAC_index = validAC.copy()
validAC_index = validAC_index.set_index('ac')

In [31]:
mutAC = [apac2ac[x] for x in df['Affected protein AC']]
mut0 = []
for i in mutAC:
    mut0.append(validAC_index.loc[i, 'seq'])

In [32]:
par = []
parAC = []

for i in df.index:
    sameFlag = False
    if len(df.loc[i, 'partners']) > 1:
        for j in df.loc[i, 'partners']:
            if j != df.loc[i, 'Affected protein AC']:
                par.append(validAC_index.loc[apac2ac[j], 'seq'])
                parAC.append(apac2ac[j])
            elif sameFlag:
                par.append(validAC_index.loc[apac2ac[j], 'seq'])
                parAC.append(apac2ac[j])
            else:
                sameFlag = True
    elif len(df.loc[i, 'partners']) == 1:
        par.append(validAC_index.loc[apac2ac[df.loc[i, 'partners'][0]], 'seq'])
        parAC.append(apac2ac[df.loc[i, 'partners'][0]])
print(len(par))
print(df.shape)

26861
(26861, 17)


In [33]:
df['mutAC'] = mutAC
df['mut0'] = mut0
df['parAC'] = parAC
df['par0'] = par

In [34]:
mut1 = []
for i in df.index:
    tmp = df.loc[i, 'mut0']
    for j in range(len(df.loc[i, 'Feature range(s)'])):
        pos0 = int(df.loc[i, 'Feature range(s)'][j].split('-')[0])
        pos1 = int(df.loc[i, 'Feature range(s)'][j].split('-')[1])
        ori = df.loc[i, 'Original sequence'][j]
        mut = df.loc[i, 'Resulting sequence'][j]
#         try:
#             if len(ori) != len(mut):
#                 print('match length error!' + df.loc[i, '#Feature AC'])
#                 break
#         except:
#             print(df.loc[i, '#Feature AC'] )
        if tmp[(pos0 - 1): pos1] != ori:
            print(df.loc[i, 'Affected protein AC'])
#             mut1.append('error_match')
            continue
        else:
            tmp = tmp[:(pos0 - 1)] + mut + tmp[pos1:]
    tmp = tmp.replace('.', '')
    mut1.append(tmp)
print(len(mut1))

26861


In [35]:
df['mut1'] = mut1

In [36]:
df['label'] = 2
df.loc[df['Feature type'].str.contains('disrupting'), 'label'] = 0
df.loc[df['Feature type'].str.contains('decreasing'), 'label'] = 1
df.loc[df['Feature type'].str.contains('increasing'), 'label'] = 3
df.loc[df['Feature type'].str.contains('causing'), 'label'] = 4

In [38]:
df['mutAC1'] = df['mutAC'] + '_' + df['Feature short label']
df['mutAC1'] = df['mutAC1'].str.replace('_p.', '_')
df['mutAC1'] = df['mutAC1'].str.replace('[', '-')
df['mutAC1'] = df['mutAC1'].str.replace(']', '-')
df['mutAC1'] = df['mutAC1'].str.replace(';', '_')

In [39]:
df['Feature type'].value_counts()

mutation with no effect(MI:2226)         8121
mutation disrupting strength(MI:1128)    5084
mutation disrupting(MI:0573)             4396
mutation decreasing(MI:0119)             4194
mutation decreasing strength(MI:1133)    2345
mutation increasing(MI:0382)             1113
mutation increasing strength(MI:1132)     615
mutation disrupting rate(MI:1129)         352
mutation decreasing rate(MI:1130)         277
mutation causing(MI:2227)                 222
mutation increasing rate(MI:1131)         142
Name: Feature type, dtype: int64

In [40]:
df['label'].value_counts()

0    9832
2    8121
1    6816
3    1870
4     222
Name: label, dtype: int64

In [41]:
df.head()

Unnamed: 0,#Feature AC,Feature range(s),Original sequence,Resulting sequence,Feature short label,Feature type,Feature annotation,Affected protein AC,Affected protein symbol,Affected protein full name,...,Interaction AC,partners,n_partner,mutAC,mut0,parAC,par0,mut1,label,mutAC1
0,EBI-10039489,[81-81],[V],[E],p.Val81Glu,mutation disrupting(MI:0573),,P28795,PEX3,,...,EBI-10039307,"[Q03694, P28795]",2,P28795,MAPNQRSRSLLQRHRGKVLISLTGIAALFTTGSVVVFFVKRWLYKQ...,Q03694,MVLSRGETKKNSVRLTAKQEKKPQSTFQTLKQSLKLSNNKKLKQDS...,MAPNQRSRSLLQRHRGKVLISLTGIAALFTTGSVVVFFVKRWLYKQ...,0,P28795_Val81Glu
1,EBI-10039495,[188-188],[N],[I],p.Asn188Ile,mutation decreasing(MI:0119),,P28795,PEX3,,...,EBI-10039491,"[Q03694, P28795]",2,P28795,MAPNQRSRSLLQRHRGKVLISLTGIAALFTTGSVVVFFVKRWLYKQ...,Q03694,MVLSRGETKKNSVRLTAKQEKKPQSTFQTLKQSLKLSNNKKLKQDS...,MAPNQRSRSLLQRHRGKVLISLTGIAALFTTGSVVVFFVKRWLYKQ...,1,P28795_Asn188Ile
2,EBI-10039551,[81-81],[V],[E],p.Val81Glu,mutation disrupting(MI:0573),,P28795,PEX3,,...,EBI-10039532,"[P28795, Q03694]",2,P28795,MAPNQRSRSLLQRHRGKVLISLTGIAALFTTGSVVVFFVKRWLYKQ...,Q03694,MVLSRGETKKNSVRLTAKQEKKPQSTFQTLKQSLKLSNNKKLKQDS...,MAPNQRSRSLLQRHRGKVLISLTGIAALFTTGSVVVFFVKRWLYKQ...,0,P28795_Val81Glu
3,EBI-10039706,[81-81],[V],[E],p.Val81Glu,mutation disrupting(MI:0573),,P28795,PEX3,,...,EBI-10039697,"[P28795, Q03694]",2,P28795,MAPNQRSRSLLQRHRGKVLISLTGIAALFTTGSVVVFFVKRWLYKQ...,Q03694,MVLSRGETKKNSVRLTAKQEKKPQSTFQTLKQSLKLSNNKKLKQDS...,MAPNQRSRSLLQRHRGKVLISLTGIAALFTTGSVVVFFVKRWLYKQ...,0,P28795_Val81Glu
4,EBI-10039722,[81-81],[V],[E],p.Val81Glu,mutation disrupting(MI:0573),,P28795,PEX3,,...,EBI-10039716,"[P28795, Q03694]",2,P28795,MAPNQRSRSLLQRHRGKVLISLTGIAALFTTGSVVVFFVKRWLYKQ...,Q03694,MVLSRGETKKNSVRLTAKQEKKPQSTFQTLKQSLKLSNNKKLKQDS...,MAPNQRSRSLLQRHRGKVLISLTGIAALFTTGSVVVFFVKRWLYKQ...,0,P28795_Val81Glu


## delete items with same participants (for unknown mutated or wildtype in the interaction)

In [42]:
df = df[~(df['mutAC'] == df['parAC'])]
print('after drop same participants items: {}'.format(df.shape))

after drop same participants items: (23860, 24)


## drop unregular aa

In [43]:
df = df[~(df['mut0'].str.contains('B|J|O|U|X|Z'))]
df = df[~(df['mut1'].str.contains('B|J|O|U|X|Z'))]
df = df[~(df['par0'].str.contains('B|J|O|U|X|Z'))]
print('after drop unregular aa: {}'.format(df.shape))

after drop unregular aa: (23819, 24)


## step for add PSSM(full size) into table

In [39]:
def load_file(file_path):
    try:
        with open(file_path) as f_in:
            lines = f_in.readlines()
        return lines
    except IOError as err:
        print('Can not open file: ' + file_path)
        return 'nan'

def parse_pssm(filelines, winsize=51, pssm_root=None, mutated_pos=None, most1024=False):
#     pssm_root = '/lustre/home/acct-bmelgn/bmelgn-2/QianWei/app/psipred_file/psipred/BLAST+/v20200727/pssm'
    filelines = load_file(os.path.join(pssm_root, filelines + '.pssm'))
    if filelines == 'nan':
        return 'nan'
    pssmvalue = np.array([])
    for line in filelines:
        if len(line.split()) == 44:
            pssmvalue = np.r_[pssmvalue, np.array(line.split()[2:22]).astype(float)]
    pssmvalue = np.reshape(pssmvalue, (-1, 20))
    if pssmvalue.shape[0] < 1024:
        pssmvalue = np.r_[pssmvalue, np.zeros([1024 - pssmvalue.shape[0], 20])]
    if most1024:
        if pssmvalue.shape[0] > 1024:
            pssmvalue = pssmvalue[:1024, :]
    if mutated_pos != None:
        pssmvalue = np.r_[np.zeros([25, 20]), pssmvalue, np.zeros([25, 20])]
        pssmvalue = pssmvalue[mutated_pos - 1: mutated_pos + 50, :]
    
    return pssmvalue

## pssm_root saved PPSM files after psiblast batch works

In [40]:
# please change it into your customized directory
mut_pssm_root = '/lustre/home/acct-bmelgn/bmelgn-2/QianWei/app/psipred_file/psipred/BLAST+/v20200727/pssm'
ori_pssm_root = '/lustre/home/acct-bmelgn/bmelgn-2/QianWei/app/psipred_file/psipred/BLAST+/v20200731/pssm'

In [None]:
pssm_par0 = [parse_pssm(x, pssm_root=ori_pssm_root) for x in df['parAC']]
# pssm 51 window only available to single mutation items
df['pssm_par0'] = pssm_par0
df = df[~(df['pssm_par0'] == 'nan')]
print('after pssm_par0: {}'.format(df.shape))

## select only single-point items to support slide window strategy (no need for length strict of mutated protein)

In [44]:
df_s = df.copy()
print('df_copy shape: {}'.format(df_s.shape))

df_s = df_s[~(df_s['Feature range(s)'].str.len() > 1)]
print('after drop multi-point mutation items: {}'.format(df_s.shape))

df_copy shape: (23819, 24)
after drop multi-point mutation items: (21390, 24)


In [45]:
mut0_win = '0' * 51 + df_s['mut0'] + '0' * 51
mut1_win = '0' * 51 + df_s['mut1'] + '0' * 51
mut0_51 = []
mut1_51 = []
df_s['mut0_pad'] = mut0_win
df_s['mut1_pad'] = mut1_win

for i in df_s.index:
    tmp1 = df_s.loc[i, 'mut0_pad']
    tmp2 = df_s.loc[i, 'mut1_pad']
    pos = int(df_s.loc[i, 'Feature range(s)'][0].split('-')[0])
    mut0_51.append(tmp1[(pos + 25):(pos + 76)])
    mut1_51.append(tmp2[(pos + 25):(pos + 76)])

In [46]:
df_s['mut0_51'] = mut0_51
df_s['mut1_51'] = mut1_51

In [47]:
del df_s['mut0_pad']
del df_s['mut1_pad']

In [82]:
pssm_win = [parse_pssm(df_s.loc[i, 'mutAC1'], pssm_root=mut_pssm_root, mutated_pos=int(df_s.loc[i, 'Feature range(s)'][0].split('-')[0])) for i in df_s.index]
# pssm 51 window only available to single mutation items
df_s['pssm_win_mut1'] = pssm_win
df_s = df_s[~(df_s['pssm_win_mut1'] == 'nan')]
print('after pssm_win_mut1: {}'.format(df_s.shape))

pssm_win = [parse_pssm(df_s.loc[i, 'mutAC'], pssm_root=ori_pssm_root, mutated_pos=int(df_s.loc[i, 'Feature range(s)'][0].split('-')[0])) for i in df_s.index]
# pssm 51 window only available to single mutation items
df_s['pssm_win_mut0'] = pssm_win
df_s = df_s[~(df_s['pssm_win_mut0'] == 'nan')]
print('after pssm_win_mut0: {}'.format(df_s.shape))

after pssm_win_mut1: (21320, 30)
after pssm_win_mut0: (21320, 31)


In [48]:
# drop duplicates and coflicts
df_s = df_s.drop_duplicates(['mut0_51', 'par0', 'mut1_51', 'label'], keep='first')
print('after drop duplicates: {}'.format(df_s.shape))

df_s = df_s.drop_duplicates(['mut0_51', 'par0', 'mut1_51'], keep=False)
print('after drop conflicts: {}'.format(df_s.shape))

after drop duplicates: (17713, 26)
after drop conflicts: (16573, 26)


In [None]:
df_s.to_pickle('../data/processed_mutations.dataset')

In [39]:
df_s.head()

Unnamed: 0,#Feature AC,Feature range(s),Original sequence,Resulting sequence,Feature short label,Feature type,Feature annotation,Affected protein AC,Affected protein symbol,Affected protein full name,...,n_partner,mutAC,mut0,parAC,par0,mut1,label,mut0_51,mut1_51,mutAC1
0,EBI-10039489,[81-81],[V],[E],p.Val81Glu,mutation disrupting(MI:0573),,P28795,PEX3,,...,2,P28795,MAPNQRSRSLLQRHRGKVLISLTGIAALFTTGSVVVFFVKRWLYKQ...,Q03694,MVLSRGETKKNSVRLTAKQEKKPQSTFQTLKQSLKLSNNKKLKQDS...,MAPNQRSRSLLQRHRGKVLISLTGIAALFTTGSVVVFFVKRWLYKQ...,1,IKEQIKRRFEQTQEDSLYTIYELLPVWRMVLNENDLNLDSIVTQLK...,IKEQIKRRFEQTQEDSLYTIYELLPEWRMVLNENDLNLDSIVTQLK...,P28795_p.Val81Glu
1,EBI-10039495,[188-188],[N],[I],p.Asn188Ile,mutation decreasing(MI:0119),,P28795,PEX3,,...,2,P28795,MAPNQRSRSLLQRHRGKVLISLTGIAALFTTGSVVVFFVKRWLYKQ...,Q03694,MVLSRGETKKNSVRLTAKQEKKPQSTFQTLKQSLKLSNNKKLKQDS...,MAPNQRSRSLLQRHRGKVLISLTGIAALFTTGSVVVFFVKRWLYKQ...,2,NEYLDSAIKLTMQQENCNKLQNRFYNWVTSWWSDPEDKADDAMVMA...,NEYLDSAIKLTMQQENCNKLQNRFYIWVTSWWSDPEDKADDAMVMA...,P28795_p.Asn188Ile
2,EBI-10041181,[368-368],[W],[A],p.Trp368Ala,mutation decreasing(MI:0119),,Q9UKV5,AMFR,,...,2,Q9UKV5,MPLLFLERFPWPSLRTYTGLSGLALLGTIISAYRALSQPEAGPGEP...,P60604,MAGTALKRLMAEYKQLTLNPPEGIVAGPMNEENFFEWEALIMGPED...,MPLLFLERFPWPSLRTYTGLSGLALLGTIISAYRALSQPEAGPGEP...,2,ICWDSMQAARKLPCGHLFHNSCLRSWLEQDTSCPTCRMSLNIADNN...,ICWDSMQAARKLPCGHLFHNSCLRSALEQDTSCPTCRMSLNIADNN...,Q9UKV5_p.Trp368Ala
3,EBI-10041190,[377-377],[T],[D],p.Thr377Asp,mutation decreasing(MI:0119),,Q9UKV5,AMFR,,...,2,Q9UKV5,MPLLFLERFPWPSLRTYTGLSGLALLGTIISAYRALSQPEAGPGEP...,P60604,MAGTALKRLMAEYKQLTLNPPEGIVAGPMNEENFFEWEALIMGPED...,MPLLFLERFPWPSLRTYTGLSGLALLGTIISAYRALSQPEAGPGEP...,2,RKLPCGHLFHNSCLRSWLEQDTSCPTCRMSLNIADNNRVREEHQGE...,RKLPCGHLFHNSCLRSWLEQDTSCPDCRMSLNIADNNRVREEHQGE...,Q9UKV5_p.Thr377Asp
4,EBI-10041290,[379-379],[R],[E],p.Arg379Glu,mutation decreasing(MI:0119),,Q9UKV5,AMFR,,...,2,Q9UKV5,MPLLFLERFPWPSLRTYTGLSGLALLGTIISAYRALSQPEAGPGEP...,P60604,MAGTALKRLMAEYKQLTLNPPEGIVAGPMNEENFFEWEALIMGPED...,MPLLFLERFPWPSLRTYTGLSGLALLGTIISAYRALSQPEAGPGEP...,2,LPCGHLFHNSCLRSWLEQDTSCPTCRMSLNIADNNRVREEHQGENL...,LPCGHLFHNSCLRSWLEQDTSCPTCEMSLNIADNNRVREEHQGENL...,Q9UKV5_p.Arg379Glu
