In [1]:
import pypdb
import os
import pandas as pd
import pickle
from pypdb.clients.pdb import pdb_client
import tqdm
from tqdm import tqdm

import gzip
import numpy as np
from Bio.PDB import *
from Bio.PDB.Polypeptide import three_to_one, is_aa

In [2]:
## This code for pdb file manipulation is taken is taken from https://github.com/compbiomed-unito/acdc-nn/blob/master/acdc_nn/util.py

def magic_open(path):
    return (gzip.open if path.endswith('.gz') else open)(path, 'rt')

def pdb2seq(pp):
    ''' pdb2seq(pp) takes a pdb_structure_chain 
    and return its sequence '''
    seq = [] # pp.get_sequence()
    reslist = []
    for ppc  in pp:
        reslist += [res for res in ppc]
        seq += [str(ppc.get_sequence())]
    return "".join(seq)

def map_pdb_pos(pp):
    ''' map_pdb_pos
    Returns two dicts seq2pdb[seq_pos], pdb2seq[pdb_pos]'''
    reslist = []
    for ppc  in pp:
        reslist += [res for res in ppc]
    seq2pdb = dict(zip( map(str,range(1,len(reslist)+1)), [str(r.get_id()[1])+r.get_id()[2].strip() for r in reslist]))
    pdb2seq = dict(zip( [str(r.get_id()[1])+r.get_id()[2].strip() for r in reslist], map(str,range(1,len(reslist)+1)) ))
    return seq2pdb, pdb2seq

def pdb2info(pdb_file, chain):
    ''' pdb2info(pdb_file) 
    Returns structure, polypeptide '''
    parser=PDBParser(QUIET=True)
    with magic_open(pdb_file) as f:
        structure = parser.get_structure('X', f)
    pchain=structure[0][chain]
    ppb=PPBuilder()
    pp = ppb.build_peptides(pchain, aa_only=False) #[0]
    return (structure, pchain, pdb2seq(pp), *map_pdb_pos(pp)) 

# S2648

In [3]:
df_S2648 = pd.read_csv('DATA/S2648.csv')

In [42]:
print('Total dataset length', len(df_S2648))
pdb_ids = list(set([t.split()[0].upper() for t in df_S2648.PDB_CHAIN.to_list()]))
print('Total number of different chains in dataset', len(pdb_ids))

Total dataset length 2648
Total number of different chains in dataset 132


In [5]:
for pdb_id in pdb_ids:
    if not os.path.isfile(f"PDB/{pdb_id[:4]}.pdb"):
        with open(f"PDB/{pdb_id[:4]}.pdb", "w") as fh:
            fh.write(pdb_client.get_pdb_file(f"{pdb_id[:4]}", compression=False))

Sending GET request to https://files.rcsb.org/download/1B26.pdb to fetch 1B26's pdb file as a string.
Sending GET request to https://files.rcsb.org/download/1A5E.pdb to fetch 1A5E's pdb file as a string.
Sending GET request to https://files.rcsb.org/download/1CEY.pdb to fetch 1CEY's pdb file as a string.
Sending GET request to https://files.rcsb.org/download/1B8E.pdb to fetch 1B8E's pdb file as a string.
Sending GET request to https://files.rcsb.org/download/1IMQ.pdb to fetch 1IMQ's pdb file as a string.
Sending GET request to https://files.rcsb.org/download/3PGK.pdb to fetch 3PGK's pdb file as a string.
Sending GET request to https://files.rcsb.org/download/1ARR.pdb to fetch 1ARR's pdb file as a string.
Sending GET request to https://files.rcsb.org/download/2NVH.pdb to fetch 2NVH's pdb file as a string.
Sending GET request to https://files.rcsb.org/download/1MBG.pdb to fetch 1MBG's pdb file as a string.
Sending GET request to https://files.rcsb.org/download/1N0J.pdb to fetch 1N0J's pd

Sending GET request to https://files.rcsb.org/download/1LVE.pdb to fetch 1LVE's pdb file as a string.
Sending GET request to https://files.rcsb.org/download/1KCQ.pdb to fetch 1KCQ's pdb file as a string.
Sending GET request to https://files.rcsb.org/download/1DKT.pdb to fetch 1DKT's pdb file as a string.
Sending GET request to https://files.rcsb.org/download/5PTI.pdb to fetch 5PTI's pdb file as a string.
Sending GET request to https://files.rcsb.org/download/1CSE.pdb to fetch 1CSE's pdb file as a string.
Sending GET request to https://files.rcsb.org/download/1IET.pdb to fetch 1IET's pdb file as a string.
Sending GET request to https://files.rcsb.org/download/2TRX.pdb to fetch 2TRX's pdb file as a string.
Sending GET request to https://files.rcsb.org/download/5CRO.pdb to fetch 5CRO's pdb file as a string.
Sending GET request to https://files.rcsb.org/download/1CHK.pdb to fetch 1CHK's pdb file as a string.
Sending GET request to https://files.rcsb.org/download/1A43.pdb to fetch 1A43's pd

In [6]:
wt = []
mut = []
ddg = []
pdb_ids = []
mut_infos = []
poss = []

verbatim_pdb_ids = {'1LVEA'}


print('Processing s2648')

for idx in tqdm(range(len(df_S2648))):
    pdb_id = df_S2648.iloc[idx]['PDB_CHAIN'].upper()
    wild_aa = df_S2648.iloc[idx]['WILD_RES']
    pos = str(df_S2648.iloc[idx]['POSITION'])
    mutant_aa = df_S2648.iloc[idx]['MUTANT_RES']
    exp_ddg = df_S2648.iloc[idx]['EXP_DDG']
        
    _, _, sequence, pdb2seq_pos, seq2pdb_pos = pdb2info(f'PDB/{pdb_id[:4]}.pdb', pdb_id[-1])
    
    if pdb_id in verbatim_pdb_ids:
        seq2pdb_pos = {str(i):str(i) for i in range(len(sequence))}
    
    if pos not in seq2pdb_pos:
        print(f'Indexing error for {pdb_id} position {pos} not present in mapping {seq2pdb_pos}')
        
    else:
        if sequence[int(seq2pdb_pos[pos])-1]!=wild_aa:
            print(f'Error for {pdb_id} expected {wild_aa} at position {pos} ')
            print(f'Sequence is {sequence}')
            print(f'Mapping is {seq2pdb_pos}')
        
        else:
            wt.append(sequence)
            tt = list(sequence)
            tt[int(seq2pdb_pos[pos])-1] = mutant_aa
            poss.append(int(seq2pdb_pos[pos])-1)
            mut.append(''.join(tt))
            ddg.append(exp_ddg)
            pdb_ids.append(pdb_id)
            mut_infos.append(str(wild_aa) + pos + str(mutant_aa))

Processing s2648






100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2648/2648 [03:17<00:00, 13.39it/s]


In [7]:
pd.DataFrame({'wt_seq': wt, 
              'mut_seq': mut ,
              'ddg': ddg, 
              'pdb_id': pdb_ids, 
              'mut_info': mut_infos,
              'pos': poss}).to_csv('DATASETS/S2648.csv')

# S3488

In [15]:
df_1744 = pd.read_csv('DATA/Q1744.txt', sep = ' ', names = ['PDB_CHAIN', 'POSITION', 'WILD_RES', 'MUTANT_RES', 'EXP_DDG'])

In [17]:
print('Total dataset length', len(df_1744))
pdb_ids = list(set([t.split()[0].upper() for t in df_1744.PDB_CHAIN.to_list()]))
print('Total number of different chains in dataset', len(pdb_ids))

Total dataset length 1744
Total number of different chains in dataset 127


In [18]:
for pdb_id in pdb_ids:
    if not os.path.isfile(f"PDB/{pdb_id[:4]}.pdb"):
        with open(f"PDB/{pdb_id[:4]}.pdb", "w") as fh:
            fh.write(pdb_client.get_pdb_file(f"{pdb_id[:4]}", compression=False))

Sending GET request to https://files.rcsb.org/download/1rx4.pdb to fetch 1rx4's pdb file as a string.
Sending GET request to https://files.rcsb.org/download/3bls.pdb to fetch 3bls's pdb file as a string.
Sending GET request to https://files.rcsb.org/download/1c2r.pdb to fetch 1c2r's pdb file as a string.
Sending GET request to https://files.rcsb.org/download/1uzc.pdb to fetch 1uzc's pdb file as a string.
Sending GET request to https://files.rcsb.org/download/1ayf.pdb to fetch 1ayf's pdb file as a string.
Sending GET request to https://files.rcsb.org/download/2ci2.pdb to fetch 2ci2's pdb file as a string.
Sending GET request to https://files.rcsb.org/download/1ftg.pdb to fetch 1ftg's pdb file as a string.
Sending GET request to https://files.rcsb.org/download/1kfw.pdb to fetch 1kfw's pdb file as a string.
Sending GET request to https://files.rcsb.org/download/1arr.pdb to fetch 1arr's pdb file as a string.
Sending GET request to https://files.rcsb.org/download/1am7.pdb to fetch 1am7's pd

Sending GET request to https://files.rcsb.org/download/1cyo.pdb to fetch 1cyo's pdb file as a string.
Sending GET request to https://files.rcsb.org/download/1yea.pdb to fetch 1yea's pdb file as a string.
Sending GET request to https://files.rcsb.org/download/1onc.pdb to fetch 1onc's pdb file as a string.
Sending GET request to https://files.rcsb.org/download/1ycc.pdb to fetch 1ycc's pdb file as a string.
Sending GET request to https://files.rcsb.org/download/2q98.pdb to fetch 2q98's pdb file as a string.
Sending GET request to https://files.rcsb.org/download/1aj3.pdb to fetch 1aj3's pdb file as a string.
Sending GET request to https://files.rcsb.org/download/1ten.pdb to fetch 1ten's pdb file as a string.
Sending GET request to https://files.rcsb.org/download/1lve.pdb to fetch 1lve's pdb file as a string.
Sending GET request to https://files.rcsb.org/download/1chk.pdb to fetch 1chk's pdb file as a string.
Sending GET request to https://files.rcsb.org/download/2afg.pdb to fetch 2afg's pd

In [20]:
wt = []
mut = []
ddg = []
pdb_ids = []
mut_infos = []
poss = []

verbatim_pdb_ids = {'1LVEA'}


print('Processing S3488')

for idx in tqdm(range(len(df_1744))):
    pdb_id = df_1744.iloc[idx]['PDB_CHAIN'].upper()
    wild_aa = df_1744.iloc[idx]['WILD_RES']
    pos = str(df_1744.iloc[idx]['POSITION'])
    mutant_aa = df_1744.iloc[idx]['MUTANT_RES']
    exp_ddg = df_1744.iloc[idx]['EXP_DDG']
        
    _, _, sequence, pdb2seq_pos, seq2pdb_pos = pdb2info(f'PDB/{pdb_id[:4]}.pdb', pdb_id[-1])
    
    if pdb_id in verbatim_pdb_ids:
        seq2pdb_pos = {str(i):str(i) for i in range(len(sequence))}
    
    if pos not in seq2pdb_pos:
        print(f'Indexing error for {pdb_id} position {pos} not present in mapping {seq2pdb_pos}')
        
    else:
        if sequence[int(seq2pdb_pos[pos])-1]!=wild_aa:
            print(f'Error for {pdb_id} expected {wild_aa} at position {pos} ')
            print(f'Sequence is {sequence}')
            print(f'Mapping is {seq2pdb_pos}')
        
        else:
            wt.append(sequence)
            tt = list(sequence)
            tt[int(seq2pdb_pos[pos])-1] = mutant_aa
            poss.append(int(seq2pdb_pos[pos])-1)
            mut.append(''.join(tt))
            ddg.append(exp_ddg)
            pdb_ids.append(pdb_id)
            mut_infos.append(str(wild_aa) + pos + str(mutant_aa))
            
            mut.append(sequence)
            tt = list(sequence)
            tt[int(seq2pdb_pos[pos])-1] = mutant_aa
            poss.append(int(seq2pdb_pos[pos])-1)
            wt.append(''.join(tt))
            ddg.append(-1*exp_ddg)
            pdb_ids.append(pdb_id)
            mut_infos.append(str(wild_aa) + pos + str(mutant_aa))

Processing S3488




100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1744/1744 [02:02<00:00, 14.21it/s]


In [21]:
pd.DataFrame({'wt_seq': wt, 
              'mut_seq': mut ,
              'ddg': ddg, 
              'pdb_id': pdb_ids, 
              'mut_info': mut_infos,
              'pos': poss}).to_csv('DATASETS/S3488.csv')

# S3421

In [69]:
df_3421 = pd.read_csv('DATA/Q3421.txt', sep = '\t', skiprows = 2, index_col=False,
                      names = ['PDB_ID', 'PDB_CHAIN', 'POSITION', 'WILD_RES', 'MUTANT_RES', 'EXP_DDG', 'T', 'PH', 'POS2'])

  df_3421 = pd.read_csv('DATA/Q3421.txt', sep = '\t', skiprows = 2, index_col=False,


In [51]:
print('Total dataset length', len(df_3421))
pdb_ids = list(set([t.split()[0].upper() for t in df_3421.PDB_ID.to_list()]))
print('Total number of different chains in dataset', len(pdb_ids))

Total dataset length 3421
Total number of different chains in dataset 148


In [52]:
for pdb_id in pdb_ids:
    if not os.path.isfile(f"PDB/{pdb_id[:4]}.pdb"):
        with open(f"PDB/{pdb_id[:4]}.pdb", "w") as fh:
            fh.write(pdb_client.get_pdb_file(f"{pdb_id[:4]}", compression=False))

Sending GET request to https://files.rcsb.org/download/1L63.pdb to fetch 1L63's pdb file as a string.
Sending GET request to https://files.rcsb.org/download/1BPI.pdb to fetch 1BPI's pdb file as a string.
Sending GET request to https://files.rcsb.org/download/1EL1.pdb to fetch 1EL1's pdb file as a string.
Sending GET request to https://files.rcsb.org/download/1STN.pdb to fetch 1STN's pdb file as a string.
Sending GET request to https://files.rcsb.org/download/1RGG.pdb to fetch 1RGG's pdb file as a string.
Sending GET request to https://files.rcsb.org/download/1CYC.pdb to fetch 1CYC's pdb file as a string.
Sending GET request to https://files.rcsb.org/download/1TUP.pdb to fetch 1TUP's pdb file as a string.
Sending GET request to https://files.rcsb.org/download/1IOB.pdb to fetch 1IOB's pdb file as a string.
Sending GET request to https://files.rcsb.org/download/1HFY.pdb to fetch 1HFY's pdb file as a string.


In [53]:
wt = []
mut = []
ddg = []
pdb_ids = []
mut_infos = []
poss = []

verbatim_pdb_ids = {'1LVEA'}


print('Processing S3421')

for idx in tqdm(range(len(df_3421))):
    pdb_id = df_3421.iloc[idx]['PDB_ID'].upper() + df_3421.iloc[idx]['PDB_CHAIN'].upper()
    wild_aa = df_3421.iloc[idx]['WILD_RES']
    pos = str(df_3421.iloc[idx]['POSITION'])
    mutant_aa = df_3421.iloc[idx]['MUTANT_RES']
    exp_ddg = df_3421.iloc[idx]['EXP_DDG']
        
    _, _, sequence, pdb2seq_pos, seq2pdb_pos = pdb2info(f'PDB/{pdb_id[:4]}.pdb', pdb_id[-1])
    
    if pdb_id in verbatim_pdb_ids:
        seq2pdb_pos = {str(i):str(i) for i in range(len(sequence))}
    
    if pos not in seq2pdb_pos:
        print(f'Indexing error for {pdb_id} position {pos} not present in mapping {seq2pdb_pos}')
        
    else:
        if sequence[int(seq2pdb_pos[pos])-1]!=wild_aa:
            print(f'Error for {pdb_id} expected {wild_aa} at position {pos} ')
            print(f'Sequence is {sequence}')
            print(f'Mapping is {seq2pdb_pos}')
        
        else:
            wt.append(sequence)
            tt = list(sequence)
            tt[int(seq2pdb_pos[pos])-1] = mutant_aa
            poss.append(int(seq2pdb_pos[pos])-1)
            mut.append(''.join(tt))
            ddg.append(exp_ddg)
            pdb_ids.append(pdb_id)
            mut_infos.append(str(wild_aa) + pos + str(mutant_aa))

Processing S3421


 31%|████████████████████████████████████████████████▍                                                                                                             | 1048/3421 [01:33<05:53,  6.71it/s]

Error for 1LVEA expected Q at position 89 
Sequence is DIVMTQSPDSLAVSLGERATINCKSSQSVLYSSNSKNYLAWYQQKPGQPPKLLIYWASTRESGVPDRFSGSGSGTDFTLTISSLQAEDVAVYYCQQYYSTPYSFGQGTKLEIKR
Mapping is {'0': '0', '1': '1', '2': '2', '3': '3', '4': '4', '5': '5', '6': '6', '7': '7', '8': '8', '9': '9', '10': '10', '11': '11', '12': '12', '13': '13', '14': '14', '15': '15', '16': '16', '17': '17', '18': '18', '19': '19', '20': '20', '21': '21', '22': '22', '23': '23', '24': '24', '25': '25', '26': '26', '27': '27', '28': '28', '29': '29', '30': '30', '31': '31', '32': '32', '33': '33', '34': '34', '35': '35', '36': '36', '37': '37', '38': '38', '39': '39', '40': '40', '41': '41', '42': '42', '43': '43', '44': '44', '45': '45', '46': '46', '47': '47', '48': '48', '49': '49', '50': '50', '51': '51', '52': '52', '53': '53', '54': '54', '55': '55', '56': '56', '57': '57', '58': '58', '59': '59', '60': '60', '61': '61', '62': '62', '63': '63', '64': '64', '65': '65', '66': '66', '67': '67', '68': '68', '69': '69'

 31%|████████████████████████████████████████████████▊                                                                                                             | 1058/3421 [01:33<03:00, 13.11it/s]

Error for 1LVEA expected K at position 30 
Sequence is DIVMTQSPDSLAVSLGERATINCKSSQSVLYSSNSKNYLAWYQQKPGQPPKLLIYWASTRESGVPDRFSGSGSGTDFTLTISSLQAEDVAVYYCQQYYSTPYSFGQGTKLEIKR
Mapping is {'0': '0', '1': '1', '2': '2', '3': '3', '4': '4', '5': '5', '6': '6', '7': '7', '8': '8', '9': '9', '10': '10', '11': '11', '12': '12', '13': '13', '14': '14', '15': '15', '16': '16', '17': '17', '18': '18', '19': '19', '20': '20', '21': '21', '22': '22', '23': '23', '24': '24', '25': '25', '26': '26', '27': '27', '28': '28', '29': '29', '30': '30', '31': '31', '32': '32', '33': '33', '34': '34', '35': '35', '36': '36', '37': '37', '38': '38', '39': '39', '40': '40', '41': '41', '42': '42', '43': '43', '44': '44', '45': '45', '46': '46', '47': '47', '48': '48', '49': '49', '50': '50', '51': '51', '52': '52', '53': '53', '54': '54', '55': '55', '56': '56', '57': '57', '58': '58', '59': '59', '60': '60', '61': '61', '62': '62', '63': '63', '64': '64', '65': '65', '66': '66', '67': '67', '68': '68', '69': '69'







100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3421/3421 [03:55<00:00, 14.55it/s]


In [54]:
pd.DataFrame({'wt_seq': wt, 
              'mut_seq': mut ,
              'ddg': ddg, 
              'pdb_id': pdb_ids, 
              'mut_info': mut_infos,
              'pos': poss}).to_csv('DATASETS/S3421.csv')

# ACDC-varibench

In [67]:
df_acdc_varibench = pd.concat([pd.read_csv(os.path.join('DATA/varibench/', f), sep = ' ',
            names = ['PDB_CHAIN', 'MUTATION', 'EXP_DDG']) for f in os.listdir('DATA/varibench/')]).drop_duplicates()

In [71]:
print('Total dataset length', len(df_acdc_varibench))
pdb_ids = list(set([t.split()[0].upper() for t in df_acdc_varibench.PDB_CHAIN.to_list()]))
print('Total number of different chains in dataset', len(pdb_ids))

Total dataset length 1387
Total number of different chains in dataset 78


In [72]:
for pdb_id in pdb_ids:
    if not os.path.isfile(f"PDB/{pdb_id[:4]}.pdb"):
        with open(f"PDB/{pdb_id[:4]}.pdb", "w") as fh:
            fh.write(pdb_client.get_pdb_file(f"{pdb_id[:4]}", compression=False))

Sending GET request to https://files.rcsb.org/download/1LMB.pdb to fetch 1LMB's pdb file as a string.
Sending GET request to https://files.rcsb.org/download/1HUE.pdb to fetch 1HUE's pdb file as a string.
Sending GET request to https://files.rcsb.org/download/1CDC.pdb to fetch 1CDC's pdb file as a string.
Sending GET request to https://files.rcsb.org/download/1FC1.pdb to fetch 1FC1's pdb file as a string.


In [88]:
wt = []
mut = []
ddg = []
pdb_ids = []
mut_infos = []
poss = []

no_verbatim_pdb_ids = {'1C9OA', '1VQBA'}


print('Processing ACDC-varibench')

for idx in tqdm(range(len(df_acdc_varibench))):
    pdb_id = df_acdc_varibench.iloc[idx]['PDB_CHAIN'].upper()
    wild_aa = df_acdc_varibench.iloc[idx]['MUTATION'][0]
    pos = df_acdc_varibench.iloc[idx]['MUTATION'][1:-1]
    mutant_aa = df_acdc_varibench.iloc[idx]['MUTATION'][-1]
    exp_ddg = df_acdc_varibench.iloc[idx]['EXP_DDG']
    
    #if pdb_id!= '1CLWA':
    #    continue
        
    _, _, sequence, pdb2seq_pos, seq2pdb_pos = pdb2info(f'PDB/{pdb_id[:4]}.pdb', pdb_id[-1])
    
    if pdb_id not in no_verbatim_pdb_ids:
        seq2pdb_pos = {str(i):str(i) for i in range(len(sequence))}
    
    if pos not in seq2pdb_pos:
        print(f'Indexing error for {pdb_id} position {pos} not present in mapping {seq2pdb_pos}')
        
    else:
        if sequence[int(seq2pdb_pos[pos])-1]!=wild_aa:
            print(f'Error for {pdb_id} expected {wild_aa} at position {pos} ')
            print(f'Sequence is {sequence}')
            print(f'Mapping is {seq2pdb_pos}')
        
        else:
            wt.append(sequence)
            tt = list(sequence)
            tt[int(seq2pdb_pos[pos])-1] = mutant_aa
            poss.append(int(seq2pdb_pos[pos])-1)
            mut.append(''.join(tt))
            ddg.append(exp_ddg)
            pdb_ids.append(pdb_id)
            mut_infos.append(str(wild_aa) + pos + str(mutant_aa))

Processing S3421


  1%|█▍                                                                                                                                                              | 12/1387 [00:00<00:37, 36.85it/s]

Error for 1AM7A expected H at position 30 
Sequence is MVEINNQRKAFLDMLAWSEGTDNGRQKTRNHGYDVIVGGELFTDYSDHPRKLVTLNPKLKSTGAGRYQLLSRWWDAYRKQLGLKDFSPKSQDAVALQQIKERGALPMIDRGDIRQAIDRCSNIWASLPGAGYGQFEHKADSLIAKFKEAGGTVR
Mapping is {'0': '0', '1': '1', '2': '2', '3': '3', '4': '4', '5': '5', '6': '6', '7': '7', '8': '8', '9': '9', '10': '10', '11': '11', '12': '12', '13': '13', '14': '14', '15': '15', '16': '16', '17': '17', '18': '18', '19': '19', '20': '20', '21': '21', '22': '22', '23': '23', '24': '24', '25': '25', '26': '26', '27': '27', '28': '28', '29': '29', '30': '30', '31': '31', '32': '32', '33': '33', '34': '34', '35': '35', '36': '36', '37': '37', '38': '38', '39': '39', '40': '40', '41': '41', '42': '42', '43': '43', '44': '44', '45': '45', '46': '46', '47': '47', '48': '48', '49': '49', '50': '50', '51': '51', '52': '52', '53': '53', '54': '54', '55': '55', '56': '56', '57': '57', '58': '58', '59': '59', '60': '60', '61': '61', '62': '62', '63': '63', '64': '64', '65': '65', '66': 

  1%|█▊                                                                                                                                                              | 16/1387 [00:01<02:06, 10.80it/s]

Error for 1AM7A expected H at position 47 
Sequence is MVEINNQRKAFLDMLAWSEGTDNGRQKTRNHGYDVIVGGELFTDYSDHPRKLVTLNPKLKSTGAGRYQLLSRWWDAYRKQLGLKDFSPKSQDAVALQQIKERGALPMIDRGDIRQAIDRCSNIWASLPGAGYGQFEHKADSLIAKFKEAGGTVR
Mapping is {'0': '0', '1': '1', '2': '2', '3': '3', '4': '4', '5': '5', '6': '6', '7': '7', '8': '8', '9': '9', '10': '10', '11': '11', '12': '12', '13': '13', '14': '14', '15': '15', '16': '16', '17': '17', '18': '18', '19': '19', '20': '20', '21': '21', '22': '22', '23': '23', '24': '24', '25': '25', '26': '26', '27': '27', '28': '28', '29': '29', '30': '30', '31': '31', '32': '32', '33': '33', '34': '34', '35': '35', '36': '36', '37': '37', '38': '38', '39': '39', '40': '40', '41': '41', '42': '42', '43': '43', '44': '44', '45': '45', '46': '46', '47': '47', '48': '48', '49': '49', '50': '50', '51': '51', '52': '52', '53': '53', '54': '54', '55': '55', '56': '56', '57': '57', '58': '58', '59': '59', '60': '60', '61': '61', '62': '62', '63': '63', '64': '64', '65': '65', '66': 

 22%|███████████████████████████████████▎                                                                                                                           | 308/1387 [00:14<00:46, 23.24it/s]

Error for 1ONCA expected M at position 22 
Sequence is EDWLTFQKKHITNTRDVDCDNIMSTNLFHCKDKNTFIYSRPEPVKAICKGIIASKNVLTTSEFYLSDCNVTSRPCKYKLKKSTNKFCVTCENQAPVHFVGVGSC
Mapping is {'1': '1', '2': '2', '3': '3', '4': '4', '5': '5', '6': '6', '7': '7', '8': '8', '9': '9', '10': '10', '11': '11', '12': '12', '13': '13', '14': '14', '15': '15', '16': '16', '17': '17', '18': '18', '19': '19', '20': '20', '21': '21', '22': '22', '23': '23', '24': '24', '25': '25', '26': '26', '27': '27', '28': '28', '29': '29', '30': '30', '31': '31', '32': '32', '33': '33', '34': '34', '35': '35', '36': '36', '37': '37', '38': '38', '39': '39', '40': '40', '41': '41', '42': '42', '43': '43', '44': '44', '45': '45', '46': '46', '47': '47', '48': '48', '49': '49', '50': '50', '51': '51', '52': '52', '53': '53', '54': '54', '55': '55', '56': '56', '57': '57', '58': '58', '59': '59', '60': '60', '61': '61', '62': '62', '63': '63', '64': '64', '65': '65', '66': '66', '67': '67', '68': '68', '69': '69', '70': '70', '71': 

 35%|███████████████████████████████████████████████████████▌                                                                                                       | 485/1387 [00:22<00:21, 41.33it/s]

Error for 1STNA expected A at position 55 
Sequence is KLHKEPATLIKAIDGDTVKLMYKGQPMTFRLLLVDTPETKHPKKGVEKYGPEASAFTKKMVENAKKIEVEFDKGQRTDKYGRGLAYIYADGKMVNEALVRQGLAKVAYVYKPNNTHEQHLRKSEAQAKKEKLNIWS
Mapping is {'6': '1', '7': '2', '8': '3', '9': '4', '10': '5', '11': '6', '12': '7', '13': '8', '14': '9', '15': '10', '16': '11', '17': '12', '18': '13', '19': '14', '20': '15', '21': '16', '22': '17', '23': '18', '24': '19', '25': '20', '26': '21', '27': '22', '28': '23', '29': '24', '30': '25', '31': '26', '32': '27', '33': '28', '34': '29', '35': '30', '36': '31', '37': '32', '38': '33', '39': '34', '40': '35', '41': '36', '42': '37', '43': '38', '44': '39', '45': '40', '46': '41', '47': '42', '48': '43', '49': '44', '50': '45', '51': '46', '52': '47', '53': '48', '54': '49', '55': '50', '56': '51', '57': '52', '58': '53', '59': '54', '60': '55', '61': '56', '62': '57', '63': '58', '64': '59', '65': '60', '66': '61', '67': '62', '68': '63', '69': '64', '70': '65', '71': '66', '72': '67', '73':

 35%|████████████████████████████████████████████████████████▏                                                                                                      | 490/1387 [00:22<00:21, 41.19it/s]

Error for 1STNA expected E at position 68 
Sequence is KLHKEPATLIKAIDGDTVKLMYKGQPMTFRLLLVDTPETKHPKKGVEKYGPEASAFTKKMVENAKKIEVEFDKGQRTDKYGRGLAYIYADGKMVNEALVRQGLAKVAYVYKPNNTHEQHLRKSEAQAKKEKLNIWS
Mapping is {'6': '1', '7': '2', '8': '3', '9': '4', '10': '5', '11': '6', '12': '7', '13': '8', '14': '9', '15': '10', '16': '11', '17': '12', '18': '13', '19': '14', '20': '15', '21': '16', '22': '17', '23': '18', '24': '19', '25': '20', '26': '21', '27': '22', '28': '23', '29': '24', '30': '25', '31': '26', '32': '27', '33': '28', '34': '29', '35': '30', '36': '31', '37': '32', '38': '33', '39': '34', '40': '35', '41': '36', '42': '37', '43': '38', '44': '39', '45': '40', '46': '41', '47': '42', '48': '43', '49': '44', '50': '45', '51': '46', '52': '47', '53': '48', '54': '49', '55': '50', '56': '51', '57': '52', '58': '53', '59': '54', '60': '55', '61': '56', '62': '57', '63': '58', '64': '59', '65': '60', '66': '61', '67': '62', '68': '63', '69': '64', '70': '65', '71': '66', '72': '67', '73':

 36%|████████████████████████████████████████████████████████▋                                                                                                      | 495/1387 [00:22<00:31, 28.33it/s]

Error for 1STNA expected F at position 56 
Sequence is KLHKEPATLIKAIDGDTVKLMYKGQPMTFRLLLVDTPETKHPKKGVEKYGPEASAFTKKMVENAKKIEVEFDKGQRTDKYGRGLAYIYADGKMVNEALVRQGLAKVAYVYKPNNTHEQHLRKSEAQAKKEKLNIWS
Mapping is {'6': '1', '7': '2', '8': '3', '9': '4', '10': '5', '11': '6', '12': '7', '13': '8', '14': '9', '15': '10', '16': '11', '17': '12', '18': '13', '19': '14', '20': '15', '21': '16', '22': '17', '23': '18', '24': '19', '25': '20', '26': '21', '27': '22', '28': '23', '29': '24', '30': '25', '31': '26', '32': '27', '33': '28', '34': '29', '35': '30', '36': '31', '37': '32', '38': '33', '39': '34', '40': '35', '41': '36', '42': '37', '43': '38', '44': '39', '45': '40', '46': '41', '47': '42', '48': '43', '49': '44', '50': '45', '51': '46', '52': '47', '53': '48', '54': '49', '55': '50', '56': '51', '57': '52', '58': '53', '59': '54', '60': '55', '61': '56', '62': '57', '63': '58', '64': '59', '65': '60', '66': '61', '67': '62', '68': '63', '69': '64', '70': '65', '71': '66', '72': '67', '73':

 36%|██████████████████████████████████████████████████████████                                                                                                     | 506/1387 [00:22<00:24, 35.59it/s]

Error for 1STNA expected L at position 98 
Sequence is KLHKEPATLIKAIDGDTVKLMYKGQPMTFRLLLVDTPETKHPKKGVEKYGPEASAFTKKMVENAKKIEVEFDKGQRTDKYGRGLAYIYADGKMVNEALVRQGLAKVAYVYKPNNTHEQHLRKSEAQAKKEKLNIWS
Mapping is {'6': '1', '7': '2', '8': '3', '9': '4', '10': '5', '11': '6', '12': '7', '13': '8', '14': '9', '15': '10', '16': '11', '17': '12', '18': '13', '19': '14', '20': '15', '21': '16', '22': '17', '23': '18', '24': '19', '25': '20', '26': '21', '27': '22', '28': '23', '29': '24', '30': '25', '31': '26', '32': '27', '33': '28', '34': '29', '35': '30', '36': '31', '37': '32', '38': '33', '39': '34', '40': '35', '41': '36', '42': '37', '43': '38', '44': '39', '45': '40', '46': '41', '47': '42', '48': '43', '49': '44', '50': '45', '51': '46', '52': '47', '53': '48', '54': '49', '55': '50', '56': '51', '57': '52', '58': '53', '59': '54', '60': '55', '61': '56', '62': '57', '63': '58', '64': '59', '65': '60', '66': '61', '67': '62', '68': '63', '69': '64', '70': '65', '71': '66', '72': '67', '73':

 37%|██████████████████████████████████████████████████████████▌                                                                                                    | 511/1387 [00:22<00:22, 38.22it/s]

Error for 1STNA expected T at position 57 
Sequence is KLHKEPATLIKAIDGDTVKLMYKGQPMTFRLLLVDTPETKHPKKGVEKYGPEASAFTKKMVENAKKIEVEFDKGQRTDKYGRGLAYIYADGKMVNEALVRQGLAKVAYVYKPNNTHEQHLRKSEAQAKKEKLNIWS
Mapping is {'6': '1', '7': '2', '8': '3', '9': '4', '10': '5', '11': '6', '12': '7', '13': '8', '14': '9', '15': '10', '16': '11', '17': '12', '18': '13', '19': '14', '20': '15', '21': '16', '22': '17', '23': '18', '24': '19', '25': '20', '26': '21', '27': '22', '28': '23', '29': '24', '30': '25', '31': '26', '32': '27', '33': '28', '34': '29', '35': '30', '36': '31', '37': '32', '38': '33', '39': '34', '40': '35', '41': '36', '42': '37', '43': '38', '44': '39', '45': '40', '46': '41', '47': '42', '48': '43', '49': '44', '50': '45', '51': '46', '52': '47', '53': '48', '54': '49', '55': '50', '56': '51', '57': '52', '58': '53', '59': '54', '60': '55', '61': '56', '62': '57', '63': '58', '64': '59', '65': '60', '66': '61', '67': '62', '68': '63', '69': '64', '70': '65', '71': '66', '72': '67', '73':

 37%|███████████████████████████████████████████████████████████▌                                                                                                   | 520/1387 [00:23<00:29, 29.48it/s]

Error for 1STNA expected W at position 135 
Sequence is KLHKEPATLIKAIDGDTVKLMYKGQPMTFRLLLVDTPETKHPKKGVEKYGPEASAFTKKMVENAKKIEVEFDKGQRTDKYGRGLAYIYADGKMVNEALVRQGLAKVAYVYKPNNTHEQHLRKSEAQAKKEKLNIWS
Mapping is {'6': '1', '7': '2', '8': '3', '9': '4', '10': '5', '11': '6', '12': '7', '13': '8', '14': '9', '15': '10', '16': '11', '17': '12', '18': '13', '19': '14', '20': '15', '21': '16', '22': '17', '23': '18', '24': '19', '25': '20', '26': '21', '27': '22', '28': '23', '29': '24', '30': '25', '31': '26', '32': '27', '33': '28', '34': '29', '35': '30', '36': '31', '37': '32', '38': '33', '39': '34', '40': '35', '41': '36', '42': '37', '43': '38', '44': '39', '45': '40', '46': '41', '47': '42', '48': '43', '49': '44', '50': '45', '51': '46', '52': '47', '53': '48', '54': '49', '55': '50', '56': '51', '57': '52', '58': '53', '59': '54', '60': '55', '61': '56', '62': '57', '63': '58', '64': '59', '65': '60', '66': '61', '67': '62', '68': '63', '69': '64', '70': '65', '71': '66', '72': '67', '73'

 45%|███████████████████████████████████████████████████████████████████████▊                                                                                       | 626/1387 [00:26<00:18, 41.81it/s]

Indexing error for 1YCCA position 106 not present in mapping {'-5': '1', '-4': '2', '-3': '3', '-2': '4', '-1': '5', '1': '6', '2': '7', '3': '8', '4': '9', '5': '10', '6': '11', '7': '12', '8': '13', '9': '14', '10': '15', '11': '16', '12': '17', '13': '18', '14': '19', '15': '20', '16': '21', '17': '22', '18': '23', '19': '24', '20': '25', '21': '26', '22': '27', '23': '28', '24': '29', '25': '30', '26': '31', '27': '32', '28': '33', '29': '34', '30': '35', '31': '36', '32': '37', '33': '38', '34': '39', '35': '40', '36': '41', '37': '42', '38': '43', '39': '44', '40': '45', '41': '46', '42': '47', '43': '48', '44': '49', '45': '50', '46': '51', '47': '52', '48': '53', '49': '54', '50': '55', '51': '56', '52': '57', '53': '58', '54': '59', '55': '60', '56': '61', '57': '62', '58': '63', '59': '64', '60': '65', '61': '66', '62': '67', '63': '68', '64': '69', '65': '70', '66': '71', '67': '72', '68': '73', '69': '74', '70': '75', '71': '76', '72': '77', '73': '78', '74': '79', '75': '8

 46%|████████████████████████████████████████████████████████████████████████▉                                                                                      | 636/1387 [00:26<00:17, 41.89it/s]

Error for 1YCCA expected N at position 57 
Sequence is TEFKAGSAKKGATLFKTRCLQCHTVEKGGPHKVGPNLHGIFGRHSGQAEGYSYTDANIKKNVLWDENNMSEYLTNPKKYIPGTKMAFGGLKKEKDRNDLITYLKKACE
Mapping is {'-5': '1', '-4': '2', '-3': '3', '-2': '4', '-1': '5', '1': '6', '2': '7', '3': '8', '4': '9', '5': '10', '6': '11', '7': '12', '8': '13', '9': '14', '10': '15', '11': '16', '12': '17', '13': '18', '14': '19', '15': '20', '16': '21', '17': '22', '18': '23', '19': '24', '20': '25', '21': '26', '22': '27', '23': '28', '24': '29', '25': '30', '26': '31', '27': '32', '28': '33', '29': '34', '30': '35', '31': '36', '32': '37', '33': '38', '34': '39', '35': '40', '36': '41', '37': '42', '38': '43', '39': '44', '40': '45', '41': '46', '42': '47', '43': '48', '44': '49', '45': '50', '46': '51', '47': '52', '48': '53', '49': '54', '50': '55', '51': '56', '52': '57', '53': '58', '54': '59', '55': '60', '56': '61', '57': '62', '58': '63', '59': '64', '60': '65', '61': '66', '62': '67', '63': '68', '64': '69', '65': '70', '6

 46%|█████████████████████████████████████████████████████████████████████████▍                                                                                     | 641/1387 [00:26<00:25, 29.09it/s]

Error for 1YCCA expected P at position 80 
Sequence is TEFKAGSAKKGATLFKTRCLQCHTVEKGGPHKVGPNLHGIFGRHSGQAEGYSYTDANIKKNVLWDENNMSEYLTNPKKYIPGTKMAFGGLKKEKDRNDLITYLKKACE
Mapping is {'-5': '1', '-4': '2', '-3': '3', '-2': '4', '-1': '5', '1': '6', '2': '7', '3': '8', '4': '9', '5': '10', '6': '11', '7': '12', '8': '13', '9': '14', '10': '15', '11': '16', '12': '17', '13': '18', '14': '19', '15': '20', '16': '21', '17': '22', '18': '23', '19': '24', '20': '25', '21': '26', '22': '27', '23': '28', '24': '29', '25': '30', '26': '31', '27': '32', '28': '33', '29': '34', '30': '35', '31': '36', '32': '37', '33': '38', '34': '39', '35': '40', '36': '41', '37': '42', '38': '43', '39': '44', '40': '45', '41': '46', '42': '47', '43': '48', '44': '49', '45': '50', '46': '51', '47': '52', '48': '53', '49': '54', '50': '55', '51': '56', '52': '57', '53': '58', '54': '59', '55': '60', '56': '61', '57': '62', '58': '63', '59': '64', '60': '65', '61': '66', '62': '67', '63': '68', '64': '69', '65': '70', '6

 74%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                         | 1023/1387 [01:00<00:17, 20.68it/s]

Error for 1BNIA expected D at position 10 
Sequence is VINTFDGVADYLQTYHKLPDNYITKSEAQALGWVASKGNLADVAPGKSIGGDIFSNREGKLPGKSGRTWREADINYTSGFRNSDRILYSSDWLIYKTTDHYQTFTKIR
Mapping is {'3': '1', '4': '2', '5': '3', '6': '4', '7': '5', '8': '6', '9': '7', '10': '8', '11': '9', '12': '10', '13': '11', '14': '12', '15': '13', '16': '14', '17': '15', '18': '16', '19': '17', '20': '18', '21': '19', '22': '20', '23': '21', '24': '22', '25': '23', '26': '24', '27': '25', '28': '26', '29': '27', '30': '28', '31': '29', '32': '30', '33': '31', '34': '32', '35': '33', '36': '34', '37': '35', '38': '36', '39': '37', '40': '38', '41': '39', '42': '40', '43': '41', '44': '42', '45': '43', '46': '44', '47': '45', '48': '46', '49': '47', '50': '48', '51': '49', '52': '50', '53': '51', '54': '52', '55': '53', '56': '54', '57': '55', '58': '56', '59': '57', '60': '58', '61': '59', '62': '60', '63': '61', '64': '62', '65': '63', '66': '64', '67': '65', '68': '66', '69': '67', '70': '68', '71': '69', '72': '70', 

 74%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                         | 1026/1387 [01:00<00:16, 22.12it/s]

Error for 1BNIA expected D at position 6 
Sequence is VINTFDGVADYLQTYHKLPDNYITKSEAQALGWVASKGNLADVAPGKSIGGDIFSNREGKLPGKSGRTWREADINYTSGFRNSDRILYSSDWLIYKTTDHYQTFTKIR
Mapping is {'3': '1', '4': '2', '5': '3', '6': '4', '7': '5', '8': '6', '9': '7', '10': '8', '11': '9', '12': '10', '13': '11', '14': '12', '15': '13', '16': '14', '17': '15', '18': '16', '19': '17', '20': '18', '21': '19', '22': '20', '23': '21', '24': '22', '25': '23', '26': '24', '27': '25', '28': '26', '29': '27', '30': '28', '31': '29', '32': '30', '33': '31', '34': '32', '35': '33', '36': '34', '37': '35', '38': '36', '39': '37', '40': '38', '41': '39', '42': '40', '43': '41', '44': '42', '45': '43', '46': '44', '47': '45', '48': '46', '49': '47', '50': '48', '51': '49', '52': '50', '53': '51', '54': '52', '55': '53', '56': '54', '57': '55', '58': '56', '59': '57', '60': '58', '61': '59', '62': '60', '63': '61', '64': '62', '65': '63', '66': '64', '67': '65', '68': '66', '69': '67', '70': '68', '71': '69', '72': '70', '

 74%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                        | 1032/1387 [01:00<00:19, 18.40it/s]

Error for 1BNIA expected E at position 27 
Sequence is VINTFDGVADYLQTYHKLPDNYITKSEAQALGWVASKGNLADVAPGKSIGGDIFSNREGKLPGKSGRTWREADINYTSGFRNSDRILYSSDWLIYKTTDHYQTFTKIR
Mapping is {'3': '1', '4': '2', '5': '3', '6': '4', '7': '5', '8': '6', '9': '7', '10': '8', '11': '9', '12': '10', '13': '11', '14': '12', '15': '13', '16': '14', '17': '15', '18': '16', '19': '17', '20': '18', '21': '19', '22': '20', '23': '21', '24': '22', '25': '23', '26': '24', '27': '25', '28': '26', '29': '27', '30': '28', '31': '29', '32': '30', '33': '31', '34': '32', '35': '33', '36': '34', '37': '35', '38': '36', '39': '37', '40': '38', '41': '39', '42': '40', '43': '41', '44': '42', '45': '43', '46': '44', '47': '45', '48': '46', '49': '47', '50': '48', '51': '49', '52': '50', '53': '51', '54': '52', '55': '53', '56': '54', '57': '55', '58': '56', '59': '57', '60': '58', '61': '59', '62': '60', '63': '61', '64': '62', '65': '63', '66': '64', '67': '65', '68': '66', '69': '67', '70': '68', '71': '69', '72': '70', 

 75%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                        | 1035/1387 [01:01<00:17, 20.28it/s]

Error for 1BNIA expected G at position 50 
Sequence is VINTFDGVADYLQTYHKLPDNYITKSEAQALGWVASKGNLADVAPGKSIGGDIFSNREGKLPGKSGRTWREADINYTSGFRNSDRILYSSDWLIYKTTDHYQTFTKIR
Mapping is {'3': '1', '4': '2', '5': '3', '6': '4', '7': '5', '8': '6', '9': '7', '10': '8', '11': '9', '12': '10', '13': '11', '14': '12', '15': '13', '16': '14', '17': '15', '18': '16', '19': '17', '20': '18', '21': '19', '22': '20', '23': '21', '24': '22', '25': '23', '26': '24', '27': '25', '28': '26', '29': '27', '30': '28', '31': '29', '32': '30', '33': '31', '34': '32', '35': '33', '36': '34', '37': '35', '38': '36', '39': '37', '40': '38', '41': '39', '42': '40', '43': '41', '44': '42', '45': '43', '46': '44', '47': '45', '48': '46', '49': '47', '50': '48', '51': '49', '52': '50', '53': '51', '54': '52', '55': '53', '56': '54', '57': '55', '58': '56', '59': '57', '60': '58', '61': '59', '62': '60', '63': '61', '64': '62', '65': '63', '66': '64', '67': '65', '68': '66', '69': '67', '70': '68', '71': '69', '72': '70', 

 75%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                       | 1041/1387 [01:01<00:19, 17.39it/s]

Error for 1BNIA expected G at position 63 
Sequence is VINTFDGVADYLQTYHKLPDNYITKSEAQALGWVASKGNLADVAPGKSIGGDIFSNREGKLPGKSGRTWREADINYTSGFRNSDRILYSSDWLIYKTTDHYQTFTKIR
Mapping is {'3': '1', '4': '2', '5': '3', '6': '4', '7': '5', '8': '6', '9': '7', '10': '8', '11': '9', '12': '10', '13': '11', '14': '12', '15': '13', '16': '14', '17': '15', '18': '16', '19': '17', '20': '18', '21': '19', '22': '20', '23': '21', '24': '22', '25': '23', '26': '24', '27': '25', '28': '26', '29': '27', '30': '28', '31': '29', '32': '30', '33': '31', '34': '32', '35': '33', '36': '34', '37': '35', '38': '36', '39': '37', '40': '38', '41': '39', '42': '40', '43': '41', '44': '42', '45': '43', '46': '44', '47': '45', '48': '46', '49': '47', '50': '48', '51': '49', '52': '50', '53': '51', '54': '52', '55': '53', '56': '54', '57': '55', '58': '56', '59': '57', '60': '58', '61': '59', '62': '60', '63': '61', '64': '62', '65': '63', '66': '64', '67': '65', '68': '66', '69': '67', '70': '68', '71': '69', '72': '70', 

 75%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                       | 1044/1387 [01:01<00:17, 19.15it/s]

Error for 1BNIA expected I at position 23 
Sequence is VINTFDGVADYLQTYHKLPDNYITKSEAQALGWVASKGNLADVAPGKSIGGDIFSNREGKLPGKSGRTWREADINYTSGFRNSDRILYSSDWLIYKTTDHYQTFTKIR
Mapping is {'3': '1', '4': '2', '5': '3', '6': '4', '7': '5', '8': '6', '9': '7', '10': '8', '11': '9', '12': '10', '13': '11', '14': '12', '15': '13', '16': '14', '17': '15', '18': '16', '19': '17', '20': '18', '21': '19', '22': '20', '23': '21', '24': '22', '25': '23', '26': '24', '27': '25', '28': '26', '29': '27', '30': '28', '31': '29', '32': '30', '33': '31', '34': '32', '35': '33', '36': '34', '37': '35', '38': '36', '39': '37', '40': '38', '41': '39', '42': '40', '43': '41', '44': '42', '45': '43', '46': '44', '47': '45', '48': '46', '49': '47', '50': '48', '51': '49', '52': '50', '53': '51', '54': '52', '55': '53', '56': '54', '57': '55', '58': '56', '59': '57', '60': '58', '61': '59', '62': '60', '63': '61', '64': '62', '65': '63', '66': '64', '67': '65', '68': '66', '69': '67', '70': '68', '71': '69', '72': '70', 

 76%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                      | 1050/1387 [01:02<00:19, 17.02it/s]

Error for 1BNIA expected I at position 53 
Sequence is VINTFDGVADYLQTYHKLPDNYITKSEAQALGWVASKGNLADVAPGKSIGGDIFSNREGKLPGKSGRTWREADINYTSGFRNSDRILYSSDWLIYKTTDHYQTFTKIR
Mapping is {'3': '1', '4': '2', '5': '3', '6': '4', '7': '5', '8': '6', '9': '7', '10': '8', '11': '9', '12': '10', '13': '11', '14': '12', '15': '13', '16': '14', '17': '15', '18': '16', '19': '17', '20': '18', '21': '19', '22': '20', '23': '21', '24': '22', '25': '23', '26': '24', '27': '25', '28': '26', '29': '27', '30': '28', '31': '29', '32': '30', '33': '31', '34': '32', '35': '33', '36': '34', '37': '35', '38': '36', '39': '37', '40': '38', '41': '39', '42': '40', '43': '41', '44': '42', '45': '43', '46': '44', '47': '45', '48': '46', '49': '47', '50': '48', '51': '49', '52': '50', '53': '51', '54': '52', '55': '53', '56': '54', '57': '55', '58': '56', '59': '57', '60': '58', '61': '59', '62': '60', '63': '61', '64': '62', '65': '63', '66': '64', '67': '65', '68': '66', '69': '67', '70': '68', '71': '69', '72': '70', 

 76%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                     | 1056/1387 [01:02<00:15, 21.07it/s]

Error for 1BNIA expected I at position 86 
Sequence is VINTFDGVADYLQTYHKLPDNYITKSEAQALGWVASKGNLADVAPGKSIGGDIFSNREGKLPGKSGRTWREADINYTSGFRNSDRILYSSDWLIYKTTDHYQTFTKIR
Mapping is {'3': '1', '4': '2', '5': '3', '6': '4', '7': '5', '8': '6', '9': '7', '10': '8', '11': '9', '12': '10', '13': '11', '14': '12', '15': '13', '16': '14', '17': '15', '18': '16', '19': '17', '20': '18', '21': '19', '22': '20', '23': '21', '24': '22', '25': '23', '26': '24', '27': '25', '28': '26', '29': '27', '30': '28', '31': '29', '32': '30', '33': '31', '34': '32', '35': '33', '36': '34', '37': '35', '38': '36', '39': '37', '40': '38', '41': '39', '42': '40', '43': '41', '44': '42', '45': '43', '46': '44', '47': '45', '48': '46', '49': '47', '50': '48', '51': '49', '52': '50', '53': '51', '54': '52', '55': '53', '56': '54', '57': '55', '58': '56', '59': '57', '60': '58', '61': '59', '62': '60', '63': '61', '64': '62', '65': '63', '66': '64', '67': '65', '68': '66', '69': '67', '70': '68', '71': '69', '72': '70', 

 77%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                     | 1062/1387 [01:02<00:18, 17.79it/s]

Error for 1BNIA expected I at position 94 
Sequence is VINTFDGVADYLQTYHKLPDNYITKSEAQALGWVASKGNLADVAPGKSIGGDIFSNREGKLPGKSGRTWREADINYTSGFRNSDRILYSSDWLIYKTTDHYQTFTKIR
Mapping is {'3': '1', '4': '2', '5': '3', '6': '4', '7': '5', '8': '6', '9': '7', '10': '8', '11': '9', '12': '10', '13': '11', '14': '12', '15': '13', '16': '14', '17': '15', '18': '16', '19': '17', '20': '18', '21': '19', '22': '20', '23': '21', '24': '22', '25': '23', '26': '24', '27': '25', '28': '26', '29': '27', '30': '28', '31': '29', '32': '30', '33': '31', '34': '32', '35': '33', '36': '34', '37': '35', '38': '36', '39': '37', '40': '38', '41': '39', '42': '40', '43': '41', '44': '42', '45': '43', '46': '44', '47': '45', '48': '46', '49': '47', '50': '48', '51': '49', '52': '50', '53': '51', '54': '52', '55': '53', '56': '54', '57': '55', '58': '56', '59': '57', '60': '58', '61': '59', '62': '60', '63': '61', '64': '62', '65': '63', '66': '64', '67': '65', '68': '66', '69': '67', '70': '68', '71': '69', '72': '70', 

 77%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                    | 1065/1387 [01:02<00:16, 19.88it/s]

Error for 1BNIA expected K at position 60 
Sequence is VINTFDGVADYLQTYHKLPDNYITKSEAQALGWVASKGNLADVAPGKSIGGDIFSNREGKLPGKSGRTWREADINYTSGFRNSDRILYSSDWLIYKTTDHYQTFTKIR
Mapping is {'3': '1', '4': '2', '5': '3', '6': '4', '7': '5', '8': '6', '9': '7', '10': '8', '11': '9', '12': '10', '13': '11', '14': '12', '15': '13', '16': '14', '17': '15', '18': '16', '19': '17', '20': '18', '21': '19', '22': '20', '23': '21', '24': '22', '25': '23', '26': '24', '27': '25', '28': '26', '29': '27', '30': '28', '31': '29', '32': '30', '33': '31', '34': '32', '35': '33', '36': '34', '37': '35', '38': '36', '39': '37', '40': '38', '41': '39', '42': '40', '43': '41', '44': '42', '45': '43', '46': '44', '47': '45', '48': '46', '49': '47', '50': '48', '51': '49', '52': '50', '53': '51', '54': '52', '55': '53', '56': '54', '57': '55', '58': '56', '59': '57', '60': '58', '61': '59', '62': '60', '63': '61', '64': '62', '65': '63', '66': '64', '67': '65', '68': '66', '69': '67', '70': '68', '71': '69', '72': '70', 

 77%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                    | 1071/1387 [01:03<00:18, 17.29it/s]

Error for 1BNIA expected L at position 87 
Sequence is VINTFDGVADYLQTYHKLPDNYITKSEAQALGWVASKGNLADVAPGKSIGGDIFSNREGKLPGKSGRTWREADINYTSGFRNSDRILYSSDWLIYKTTDHYQTFTKIR
Mapping is {'3': '1', '4': '2', '5': '3', '6': '4', '7': '5', '8': '6', '9': '7', '10': '8', '11': '9', '12': '10', '13': '11', '14': '12', '15': '13', '16': '14', '17': '15', '18': '16', '19': '17', '20': '18', '21': '19', '22': '20', '23': '21', '24': '22', '25': '23', '26': '24', '27': '25', '28': '26', '29': '27', '30': '28', '31': '29', '32': '30', '33': '31', '34': '32', '35': '33', '36': '34', '37': '35', '38': '36', '39': '37', '40': '38', '41': '39', '42': '40', '43': '41', '44': '42', '45': '43', '46': '44', '47': '45', '48': '46', '49': '47', '50': '48', '51': '49', '52': '50', '53': '51', '54': '52', '55': '53', '56': '54', '57': '55', '58': '56', '59': '57', '60': '58', '61': '59', '62': '60', '63': '61', '64': '62', '65': '63', '66': '64', '67': '65', '68': '66', '69': '67', '70': '68', '71': '69', '72': '70', 

 77%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                   | 1074/1387 [01:03<00:16, 19.24it/s]

Error for 1BNIA expected N at position 56 
Sequence is VINTFDGVADYLQTYHKLPDNYITKSEAQALGWVASKGNLADVAPGKSIGGDIFSNREGKLPGKSGRTWREADINYTSGFRNSDRILYSSDWLIYKTTDHYQTFTKIR
Mapping is {'3': '1', '4': '2', '5': '3', '6': '4', '7': '5', '8': '6', '9': '7', '10': '8', '11': '9', '12': '10', '13': '11', '14': '12', '15': '13', '16': '14', '17': '15', '18': '16', '19': '17', '20': '18', '21': '19', '22': '20', '23': '21', '24': '22', '25': '23', '26': '24', '27': '25', '28': '26', '29': '27', '30': '28', '31': '29', '32': '30', '33': '31', '34': '32', '35': '33', '36': '34', '37': '35', '38': '36', '39': '37', '40': '38', '41': '39', '42': '40', '43': '41', '44': '42', '45': '43', '46': '44', '47': '45', '48': '46', '49': '47', '50': '48', '51': '49', '52': '50', '53': '51', '54': '52', '55': '53', '56': '54', '57': '55', '58': '56', '59': '57', '60': '58', '61': '59', '62': '60', '63': '61', '64': '62', '65': '63', '66': '64', '67': '65', '68': '66', '69': '67', '70': '68', '71': '69', '72': '70', 

 78%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                   | 1080/1387 [01:03<00:17, 17.08it/s]

Error for 1BNIA expected N at position 82 
Sequence is VINTFDGVADYLQTYHKLPDNYITKSEAQALGWVASKGNLADVAPGKSIGGDIFSNREGKLPGKSGRTWREADINYTSGFRNSDRILYSSDWLIYKTTDHYQTFTKIR
Mapping is {'3': '1', '4': '2', '5': '3', '6': '4', '7': '5', '8': '6', '9': '7', '10': '8', '11': '9', '12': '10', '13': '11', '14': '12', '15': '13', '16': '14', '17': '15', '18': '16', '19': '17', '20': '18', '21': '19', '22': '20', '23': '21', '24': '22', '25': '23', '26': '24', '27': '25', '28': '26', '29': '27', '30': '28', '31': '29', '32': '30', '33': '31', '34': '32', '35': '33', '36': '34', '37': '35', '38': '36', '39': '37', '40': '38', '41': '39', '42': '40', '43': '41', '44': '42', '45': '43', '46': '44', '47': '45', '48': '46', '49': '47', '50': '48', '51': '49', '52': '50', '53': '51', '54': '52', '55': '53', '56': '54', '57': '55', '58': '56', '59': '57', '60': '58', '61': '59', '62': '60', '63': '61', '64': '62', '65': '63', '66': '64', '67': '65', '68': '66', '69': '67', '70': '68', '71': '69', '72': '70', 

 78%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                  | 1086/1387 [01:03<00:14, 20.64it/s]

Error for 1BNIA expected Q at position 29 
Sequence is VINTFDGVADYLQTYHKLPDNYITKSEAQALGWVASKGNLADVAPGKSIGGDIFSNREGKLPGKSGRTWREADINYTSGFRNSDRILYSSDWLIYKTTDHYQTFTKIR
Mapping is {'3': '1', '4': '2', '5': '3', '6': '4', '7': '5', '8': '6', '9': '7', '10': '8', '11': '9', '12': '10', '13': '11', '14': '12', '15': '13', '16': '14', '17': '15', '18': '16', '19': '17', '20': '18', '21': '19', '22': '20', '23': '21', '24': '22', '25': '23', '26': '24', '27': '25', '28': '26', '29': '27', '30': '28', '31': '29', '32': '30', '33': '31', '34': '32', '35': '33', '36': '34', '37': '35', '38': '36', '39': '37', '40': '38', '41': '39', '42': '40', '43': '41', '44': '42', '45': '43', '46': '44', '47': '45', '48': '46', '49': '47', '50': '48', '51': '49', '52': '50', '53': '51', '54': '52', '55': '53', '56': '54', '57': '55', '58': '56', '59': '57', '60': '58', '61': '59', '62': '60', '63': '61', '64': '62', '65': '63', '66': '64', '67': '65', '68': '66', '69': '67', '70': '68', '71': '69', '72': '70', 

 79%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                 | 1092/1387 [01:04<00:16, 17.67it/s]

Error for 1BNIA expected R at position 67 
Sequence is VINTFDGVADYLQTYHKLPDNYITKSEAQALGWVASKGNLADVAPGKSIGGDIFSNREGKLPGKSGRTWREADINYTSGFRNSDRILYSSDWLIYKTTDHYQTFTKIR
Mapping is {'3': '1', '4': '2', '5': '3', '6': '4', '7': '5', '8': '6', '9': '7', '10': '8', '11': '9', '12': '10', '13': '11', '14': '12', '15': '13', '16': '14', '17': '15', '18': '16', '19': '17', '20': '18', '21': '19', '22': '20', '23': '21', '24': '22', '25': '23', '26': '24', '27': '25', '28': '26', '29': '27', '30': '28', '31': '29', '32': '30', '33': '31', '34': '32', '35': '33', '36': '34', '37': '35', '38': '36', '39': '37', '40': '38', '41': '39', '42': '40', '43': '41', '44': '42', '45': '43', '46': '44', '47': '45', '48': '46', '49': '47', '50': '48', '51': '49', '52': '50', '53': '51', '54': '52', '55': '53', '56': '54', '57': '55', '58': '56', '59': '57', '60': '58', '61': '59', '62': '60', '63': '61', '64': '62', '65': '63', '66': '64', '67': '65', '68': '66', '69': '67', '70': '68', '71': '69', '72': '70', 

 79%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                 | 1095/1387 [01:04<00:14, 19.53it/s]

Error for 1BNIA expected S at position 89 
Sequence is VINTFDGVADYLQTYHKLPDNYITKSEAQALGWVASKGNLADVAPGKSIGGDIFSNREGKLPGKSGRTWREADINYTSGFRNSDRILYSSDWLIYKTTDHYQTFTKIR
Mapping is {'3': '1', '4': '2', '5': '3', '6': '4', '7': '5', '8': '6', '9': '7', '10': '8', '11': '9', '12': '10', '13': '11', '14': '12', '15': '13', '16': '14', '17': '15', '18': '16', '19': '17', '20': '18', '21': '19', '22': '20', '23': '21', '24': '22', '25': '23', '26': '24', '27': '25', '28': '26', '29': '27', '30': '28', '31': '29', '32': '30', '33': '31', '34': '32', '35': '33', '36': '34', '37': '35', '38': '36', '39': '37', '40': '38', '41': '39', '42': '40', '43': '41', '44': '42', '45': '43', '46': '44', '47': '45', '48': '46', '49': '47', '50': '48', '51': '49', '52': '50', '53': '51', '54': '52', '55': '53', '56': '54', '57': '55', '58': '56', '59': '57', '60': '58', '61': '59', '62': '60', '63': '61', '64': '62', '65': '63', '66': '64', '67': '65', '68': '66', '69': '67', '70': '68', '71': '69', '72': '70', 

 79%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                | 1101/1387 [01:04<00:16, 17.11it/s]

Error for 1BNIA expected T at position 103 
Sequence is VINTFDGVADYLQTYHKLPDNYITKSEAQALGWVASKGNLADVAPGKSIGGDIFSNREGKLPGKSGRTWREADINYTSGFRNSDRILYSSDWLIYKTTDHYQTFTKIR
Mapping is {'3': '1', '4': '2', '5': '3', '6': '4', '7': '5', '8': '6', '9': '7', '10': '8', '11': '9', '12': '10', '13': '11', '14': '12', '15': '13', '16': '14', '17': '15', '18': '16', '19': '17', '20': '18', '21': '19', '22': '20', '23': '21', '24': '22', '25': '23', '26': '24', '27': '25', '28': '26', '29': '27', '30': '28', '31': '29', '32': '30', '33': '31', '34': '32', '35': '33', '36': '34', '37': '35', '38': '36', '39': '37', '40': '38', '41': '39', '42': '40', '43': '41', '44': '42', '45': '43', '46': '44', '47': '45', '48': '46', '49': '47', '50': '48', '51': '49', '52': '50', '53': '51', '54': '52', '55': '53', '56': '54', '57': '55', '58': '56', '59': '57', '60': '58', '61': '59', '62': '60', '63': '61', '64': '62', '65': '63', '66': '64', '67': '65', '68': '66', '69': '67', '70': '68', '71': '69', '72': '70',

 80%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                | 1104/1387 [01:05<00:14, 19.33it/s]

Error for 1BNIA expected T at position 24 
Sequence is VINTFDGVADYLQTYHKLPDNYITKSEAQALGWVASKGNLADVAPGKSIGGDIFSNREGKLPGKSGRTWREADINYTSGFRNSDRILYSSDWLIYKTTDHYQTFTKIR
Mapping is {'3': '1', '4': '2', '5': '3', '6': '4', '7': '5', '8': '6', '9': '7', '10': '8', '11': '9', '12': '10', '13': '11', '14': '12', '15': '13', '16': '14', '17': '15', '18': '16', '19': '17', '20': '18', '21': '19', '22': '20', '23': '21', '24': '22', '25': '23', '26': '24', '27': '25', '28': '26', '29': '27', '30': '28', '31': '29', '32': '30', '33': '31', '34': '32', '35': '33', '36': '34', '37': '35', '38': '36', '39': '37', '40': '38', '41': '39', '42': '40', '43': '41', '44': '42', '45': '43', '46': '44', '47': '45', '48': '46', '49': '47', '50': '48', '51': '49', '52': '50', '53': '51', '54': '52', '55': '53', '56': '54', '57': '55', '58': '56', '59': '57', '60': '58', '61': '59', '62': '60', '63': '61', '64': '62', '65': '63', '66': '64', '67': '65', '68': '66', '69': '67', '70': '68', '71': '69', '72': '70', 

 80%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                               | 1110/1387 [01:05<00:16, 16.91it/s]

Error for 1BNIA expected T at position 4 
Sequence is VINTFDGVADYLQTYHKLPDNYITKSEAQALGWVASKGNLADVAPGKSIGGDIFSNREGKLPGKSGRTWREADINYTSGFRNSDRILYSSDWLIYKTTDHYQTFTKIR
Mapping is {'3': '1', '4': '2', '5': '3', '6': '4', '7': '5', '8': '6', '9': '7', '10': '8', '11': '9', '12': '10', '13': '11', '14': '12', '15': '13', '16': '14', '17': '15', '18': '16', '19': '17', '20': '18', '21': '19', '22': '20', '23': '21', '24': '22', '25': '23', '26': '24', '27': '25', '28': '26', '29': '27', '30': '28', '31': '29', '32': '30', '33': '31', '34': '32', '35': '33', '36': '34', '37': '35', '38': '36', '39': '37', '40': '38', '41': '39', '42': '40', '43': '41', '44': '42', '45': '43', '46': '44', '47': '45', '48': '46', '49': '47', '50': '48', '51': '49', '52': '50', '53': '51', '54': '52', '55': '53', '56': '54', '57': '55', '58': '56', '59': '57', '60': '58', '61': '59', '62': '60', '63': '61', '64': '62', '65': '63', '66': '64', '67': '65', '68': '66', '69': '67', '70': '68', '71': '69', '72': '70', '

 80%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                              | 1116/1387 [01:05<00:13, 20.50it/s]

Error for 1BNIA expected T at position 77 
Sequence is VINTFDGVADYLQTYHKLPDNYITKSEAQALGWVASKGNLADVAPGKSIGGDIFSNREGKLPGKSGRTWREADINYTSGFRNSDRILYSSDWLIYKTTDHYQTFTKIR
Mapping is {'3': '1', '4': '2', '5': '3', '6': '4', '7': '5', '8': '6', '9': '7', '10': '8', '11': '9', '12': '10', '13': '11', '14': '12', '15': '13', '16': '14', '17': '15', '18': '16', '19': '17', '20': '18', '21': '19', '22': '20', '23': '21', '24': '22', '25': '23', '26': '24', '27': '25', '28': '26', '29': '27', '30': '28', '31': '29', '32': '30', '33': '31', '34': '32', '35': '33', '36': '34', '37': '35', '38': '36', '39': '37', '40': '38', '41': '39', '42': '40', '43': '41', '44': '42', '45': '43', '46': '44', '47': '45', '48': '46', '49': '47', '50': '48', '51': '49', '52': '50', '53': '51', '54': '52', '55': '53', '56': '54', '57': '55', '58': '56', '59': '57', '60': '58', '61': '59', '62': '60', '63': '61', '64': '62', '65': '63', '66': '64', '67': '65', '68': '66', '69': '67', '70': '68', '71': '69', '72': '70', 

 81%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                              | 1122/1387 [01:06<00:15, 17.43it/s]

Error for 1BNIA expected V at position 34 
Sequence is VINTFDGVADYLQTYHKLPDNYITKSEAQALGWVASKGNLADVAPGKSIGGDIFSNREGKLPGKSGRTWREADINYTSGFRNSDRILYSSDWLIYKTTDHYQTFTKIR
Mapping is {'3': '1', '4': '2', '5': '3', '6': '4', '7': '5', '8': '6', '9': '7', '10': '8', '11': '9', '12': '10', '13': '11', '14': '12', '15': '13', '16': '14', '17': '15', '18': '16', '19': '17', '20': '18', '21': '19', '22': '20', '23': '21', '24': '22', '25': '23', '26': '24', '27': '25', '28': '26', '29': '27', '30': '28', '31': '29', '32': '30', '33': '31', '34': '32', '35': '33', '36': '34', '37': '35', '38': '36', '39': '37', '40': '38', '41': '39', '42': '40', '43': '41', '44': '42', '45': '43', '46': '44', '47': '45', '48': '46', '49': '47', '50': '48', '51': '49', '52': '50', '53': '51', '54': '52', '55': '53', '56': '54', '57': '55', '58': '56', '59': '57', '60': '58', '61': '59', '62': '60', '63': '61', '64': '62', '65': '63', '66': '64', '67': '65', '68': '66', '69': '67', '70': '68', '71': '69', '72': '70', 

 81%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                             | 1125/1387 [01:06<00:13, 19.32it/s]

Error for 1BNIA expected Y at position 11 
Sequence is VINTFDGVADYLQTYHKLPDNYITKSEAQALGWVASKGNLADVAPGKSIGGDIFSNREGKLPGKSGRTWREADINYTSGFRNSDRILYSSDWLIYKTTDHYQTFTKIR
Mapping is {'3': '1', '4': '2', '5': '3', '6': '4', '7': '5', '8': '6', '9': '7', '10': '8', '11': '9', '12': '10', '13': '11', '14': '12', '15': '13', '16': '14', '17': '15', '18': '16', '19': '17', '20': '18', '21': '19', '22': '20', '23': '21', '24': '22', '25': '23', '26': '24', '27': '25', '28': '26', '29': '27', '30': '28', '31': '29', '32': '30', '33': '31', '34': '32', '35': '33', '36': '34', '37': '35', '38': '36', '39': '37', '40': '38', '41': '39', '42': '40', '43': '41', '44': '42', '45': '43', '46': '44', '47': '45', '48': '46', '49': '47', '50': '48', '51': '49', '52': '50', '53': '51', '54': '52', '55': '53', '56': '54', '57': '55', '58': '56', '59': '57', '60': '58', '61': '59', '62': '60', '63': '61', '64': '62', '65': '63', '66': '64', '67': '65', '68': '66', '69': '67', '70': '68', '71': '69', '72': '70', 

 82%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                             | 1131/1387 [01:06<00:14, 17.10it/s]

Error for 1BNIA expected Y at position 76 
Sequence is VINTFDGVADYLQTYHKLPDNYITKSEAQALGWVASKGNLADVAPGKSIGGDIFSNREGKLPGKSGRTWREADINYTSGFRNSDRILYSSDWLIYKTTDHYQTFTKIR
Mapping is {'3': '1', '4': '2', '5': '3', '6': '4', '7': '5', '8': '6', '9': '7', '10': '8', '11': '9', '12': '10', '13': '11', '14': '12', '15': '13', '16': '14', '17': '15', '18': '16', '19': '17', '20': '18', '21': '19', '22': '20', '23': '21', '24': '22', '25': '23', '26': '24', '27': '25', '28': '26', '29': '27', '30': '28', '31': '29', '32': '30', '33': '31', '34': '32', '35': '33', '36': '34', '37': '35', '38': '36', '39': '37', '40': '38', '41': '39', '42': '40', '43': '41', '44': '42', '45': '43', '46': '44', '47': '45', '48': '46', '49': '47', '50': '48', '51': '49', '52': '50', '53': '51', '54': '52', '55': '53', '56': '54', '57': '55', '58': '56', '59': '57', '60': '58', '61': '59', '62': '60', '63': '61', '64': '62', '65': '63', '66': '64', '67': '65', '68': '66', '69': '67', '70': '68', '71': '69', '72': '70', 

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1387/1387 [01:20<00:00, 17.23it/s]


In [89]:
pd.DataFrame({'wt_seq': wt, 
              'mut_seq': mut ,
              'ddg': ddg, 
              'pdb_id': pdb_ids, 
              'mut_info': mut_infos,
              'pos': poss}).to_csv('DATASETS/ACDC_varibench.csv')

# DeepDDG train

In [92]:
df_deepddg_train = pd.read_csv('DATA/deep_ddg_train.csv', sep = ';')

In [94]:
print('Total dataset length', len(df_deepddg_train))
pdb_ids = list(set([t.split()[0].upper() for t in df_deepddg_train['PDB ID with modifications to be made'].to_list()]))
print('Total number of different chains in dataset', len(pdb_ids))

Total dataset length 5444
Total number of different chains in dataset 209


In [95]:
for pdb_id in pdb_ids:
    if not os.path.isfile(f"PDB/{pdb_id[:4]}.pdb"):
        with open(f"PDB/{pdb_id[:4]}.pdb", "w") as fh:
            fh.write(pdb_client.get_pdb_file(f"{pdb_id[:4]}", compression=False))

Sending GET request to https://files.rcsb.org/download/1O1U.pdb to fetch 1O1U's pdb file as a string.
Sending GET request to https://files.rcsb.org/download/1BF4.pdb to fetch 1BF4's pdb file as a string.
Sending GET request to https://files.rcsb.org/download/1G6P.pdb to fetch 1G6P's pdb file as a string.
Sending GET request to https://files.rcsb.org/download/4GWT.pdb to fetch 4GWT's pdb file as a string.
Sending GET request to https://files.rcsb.org/download/1BKS.pdb to fetch 1BKS's pdb file as a string.
Sending GET request to https://files.rcsb.org/download/1GLM.pdb to fetch 1GLM's pdb file as a string.
Sending GET request to https://files.rcsb.org/download/1JY0.pdb to fetch 1JY0's pdb file as a string.
Sending GET request to https://files.rcsb.org/download/1NFI.pdb to fetch 1NFI's pdb file as a string.
Sending GET request to https://files.rcsb.org/download/2RPN.pdb to fetch 2RPN's pdb file as a string.
Sending GET request to https://files.rcsb.org/download/2K3K.pdb to fetch 2K3K's pd

Sending GET request to https://files.rcsb.org/download/5C0Z.pdb to fetch 5C0Z's pdb file as a string.
Sending GET request to https://files.rcsb.org/download/4Q0M.pdb to fetch 4Q0M's pdb file as a string.


In [None]:
wt = []
mut = []
ddg = []
pdb_ids = []
mut_infos = []
poss = []

#no_verbatim_pdb_ids = {'1C9OA', '1VQBA'}


print('Processing DeepDDG train')

for idx in tqdm(range(len(df_deepddg_train))):
    pdb_id = df_deepddg_train.iloc[idx]['PDB ID with modifications to be made'].upper()
    wild_aa = df_deepddg_train.iloc[idx]['Mutation'][0]
    pos = df_deepddg_train.iloc[idx]['Mutation'][2:-2]
    mutant_aa = df_deepddg_train.iloc[idx]['Mutation'][-1]
    exp_ddg = df_deepddg_train.iloc[idx]['ΔΔG (kcal/mol) positive is stable']
    
    #if pdb_id!= '1CLWA':
    #    continue
        
    
    pdb = PDBParser().get_structure("pdb_id[:4]", f'PDB/{pdb_id[:4]}.pdb')
    chain = next(pdb.get_chains()).get_id()
        
    _, _, sequence, pdb2seq_pos, seq2pdb_pos = pdb2info(f'PDB/{pdb_id[:4]}.pdb', chain)
    
    #if pdb_id not in no_verbatim_pdb_ids:
    #    seq2pdb_pos = {str(i):str(i) for i in range(len(sequence))}
    
    if pos not in seq2pdb_pos:
        print(f'Indexing error for {pdb_id} position {pos} not present in mapping {seq2pdb_pos}')
        
    else:
        if sequence[int(seq2pdb_pos[pos])-1]!=wild_aa:
            print(f'Error for {pdb_id} expected {wild_aa} at position {pos} ')
            print(f'Sequence is {sequence}')
            print(f'Mapping is {seq2pdb_pos}')
        
        else:
            wt.append(sequence)
            tt = list(sequence)
            tt[int(seq2pdb_pos[pos])-1] = mutant_aa
            poss.append(int(seq2pdb_pos[pos])-1)
            mut.append(''.join(tt))
            ddg.append(exp_ddg)
            pdb_ids.append(pdb_id)
            mut_infos.append(str(wild_aa) + pos + str(mutant_aa))

Processing DeepDDG train








Error for 1ACB:I:F10W expected V at position 18 
Sequence is CGVPAIQPVLSGLIVNGEEAVPGSWPWQVSLQDKTGFHFCGGSLINENWVVTAAHCGVTTSDVVVAGEFDQGSSSEKIQKLKIAKVFKNSKYNSLTINNDITLLKLSTAASFSQTVSAVCLPSASDDFAAGTTCVTTGWGLTRYANTPDRLQQASLPLLSNTNCKKYWGTKIKDAMICAGASGVSSCMGDSGGPLVCKKNGAWTLVGIVSWGSSTCSTSTPGVYARVTALVNWVQQTLAAN
Mapping is {'1': '1', '2': '2', '3': '3', '4': '4', '5': '5', '6': '6', '7': '7', '8': '8', '9': '9', '10': '10', '11': '11', '12': '12', '13': '13', '16': '14', '17': '15', '18': '16', '19': '17', '20': '18', '21': '19', '22': '20', '23': '21', '24': '22', '25': '23', '26': '24', '27': '25', '28': '26', '29': '27', '30': '28', '31': '29', '32': '30', '33': '31', '34': '32', '35': '33', '36': '34', '37': '35', '38': '36', '39': '37', '40': '38', '41': '39', '42': '40', '43': '41', '44': '42', '45': '43', '46': '44', '47': '45', '48': '46', '49': '47', '50': '48', '51': '49', '52': '50', '53': '51', '54': '52', '55': '53', '56': '54', '57': '55', '58': '56', '59': '57', '60': '58', '61': '

  1%|█▎                                                                                                                                                              | 46/5444 [00:10<07:44, 11.62it/s]

Error for 1ACB:I:F10W expected V at position 34 
Sequence is CGVPAIQPVLSGLIVNGEEAVPGSWPWQVSLQDKTGFHFCGGSLINENWVVTAAHCGVTTSDVVVAGEFDQGSSSEKIQKLKIAKVFKNSKYNSLTINNDITLLKLSTAASFSQTVSAVCLPSASDDFAAGTTCVTTGWGLTRYANTPDRLQQASLPLLSNTNCKKYWGTKIKDAMICAGASGVSSCMGDSGGPLVCKKNGAWTLVGIVSWGSSTCSTSTPGVYARVTALVNWVQQTLAAN
Mapping is {'1': '1', '2': '2', '3': '3', '4': '4', '5': '5', '6': '6', '7': '7', '8': '8', '9': '9', '10': '10', '11': '11', '12': '12', '13': '13', '16': '14', '17': '15', '18': '16', '19': '17', '20': '18', '21': '19', '22': '20', '23': '21', '24': '22', '25': '23', '26': '24', '27': '25', '28': '26', '29': '27', '30': '28', '31': '29', '32': '30', '33': '31', '34': '32', '35': '33', '36': '34', '37': '35', '38': '36', '39': '37', '40': '38', '41': '39', '42': '40', '43': '41', '44': '42', '45': '43', '46': '44', '47': '45', '48': '46', '49': '47', '50': '48', '51': '49', '52': '50', '53': '51', '54': '52', '55': '53', '56': '54', '57': '55', '58': '56', '59': '57', '60': '58', '61': '



Error for 1ACB:I:F10W expected V at position 54 
Sequence is CGVPAIQPVLSGLIVNGEEAVPGSWPWQVSLQDKTGFHFCGGSLINENWVVTAAHCGVTTSDVVVAGEFDQGSSSEKIQKLKIAKVFKNSKYNSLTINNDITLLKLSTAASFSQTVSAVCLPSASDDFAAGTTCVTTGWGLTRYANTPDRLQQASLPLLSNTNCKKYWGTKIKDAMICAGASGVSSCMGDSGGPLVCKKNGAWTLVGIVSWGSSTCSTSTPGVYARVTALVNWVQQTLAAN
Mapping is {'1': '1', '2': '2', '3': '3', '4': '4', '5': '5', '6': '6', '7': '7', '8': '8', '9': '9', '10': '10', '11': '11', '12': '12', '13': '13', '16': '14', '17': '15', '18': '16', '19': '17', '20': '18', '21': '19', '22': '20', '23': '21', '24': '22', '25': '23', '26': '24', '27': '25', '28': '26', '29': '27', '30': '28', '31': '29', '32': '30', '33': '31', '34': '32', '35': '33', '36': '34', '37': '35', '38': '36', '39': '37', '40': '38', '41': '39', '42': '40', '43': '41', '44': '42', '45': '43', '46': '44', '47': '45', '48': '46', '49': '47', '50': '48', '51': '49', '52': '50', '53': '51', '54': '52', '55': '53', '56': '54', '57': '55', '58': '56', '59': '57', '60': '58', '61': '

  1%|█▍                                                                                                                                                              | 50/5444 [00:10<09:09,  9.82it/s]

Error for 1ACB:I:F10W expected V at position 18 
Sequence is CGVPAIQPVLSGLIVNGEEAVPGSWPWQVSLQDKTGFHFCGGSLINENWVVTAAHCGVTTSDVVVAGEFDQGSSSEKIQKLKIAKVFKNSKYNSLTINNDITLLKLSTAASFSQTVSAVCLPSASDDFAAGTTCVTTGWGLTRYANTPDRLQQASLPLLSNTNCKKYWGTKIKDAMICAGASGVSSCMGDSGGPLVCKKNGAWTLVGIVSWGSSTCSTSTPGVYARVTALVNWVQQTLAAN
Mapping is {'1': '1', '2': '2', '3': '3', '4': '4', '5': '5', '6': '6', '7': '7', '8': '8', '9': '9', '10': '10', '11': '11', '12': '12', '13': '13', '16': '14', '17': '15', '18': '16', '19': '17', '20': '18', '21': '19', '22': '20', '23': '21', '24': '22', '25': '23', '26': '24', '27': '25', '28': '26', '29': '27', '30': '28', '31': '29', '32': '30', '33': '31', '34': '32', '35': '33', '36': '34', '37': '35', '38': '36', '39': '37', '40': '38', '41': '39', '42': '40', '43': '41', '44': '42', '45': '43', '46': '44', '47': '45', '48': '46', '49': '47', '50': '48', '51': '49', '52': '50', '53': '51', '54': '52', '55': '53', '56': '54', '57': '55', '58': '56', '59': '57', '60': '58', '61': '

  1%|█▌                                                                                                                                                              | 54/5444 [00:10<07:43, 11.62it/s]

Error for 1ACB:I:F10W expected V at position 62 
Sequence is CGVPAIQPVLSGLIVNGEEAVPGSWPWQVSLQDKTGFHFCGGSLINENWVVTAAHCGVTTSDVVVAGEFDQGSSSEKIQKLKIAKVFKNSKYNSLTINNDITLLKLSTAASFSQTVSAVCLPSASDDFAAGTTCVTTGWGLTRYANTPDRLQQASLPLLSNTNCKKYWGTKIKDAMICAGASGVSSCMGDSGGPLVCKKNGAWTLVGIVSWGSSTCSTSTPGVYARVTALVNWVQQTLAAN
Mapping is {'1': '1', '2': '2', '3': '3', '4': '4', '5': '5', '6': '6', '7': '7', '8': '8', '9': '9', '10': '10', '11': '11', '12': '12', '13': '13', '16': '14', '17': '15', '18': '16', '19': '17', '20': '18', '21': '19', '22': '20', '23': '21', '24': '22', '25': '23', '26': '24', '27': '25', '28': '26', '29': '27', '30': '28', '31': '29', '32': '30', '33': '31', '34': '32', '35': '33', '36': '34', '37': '35', '38': '36', '39': '37', '40': '38', '41': '39', '42': '40', '43': '41', '44': '42', '45': '43', '46': '44', '47': '45', '48': '46', '49': '47', '50': '48', '51': '49', '52': '50', '53': '51', '54': '52', '55': '53', '56': '54', '57': '55', '58': '56', '59': '57', '60': '58', '61': '



Indexing error for 1ACB:I:F10W position 14 not present in mapping {'1': '1', '2': '2', '3': '3', '4': '4', '5': '5', '6': '6', '7': '7', '8': '8', '9': '9', '10': '10', '11': '11', '12': '12', '13': '13', '16': '14', '17': '15', '18': '16', '19': '17', '20': '18', '21': '19', '22': '20', '23': '21', '24': '22', '25': '23', '26': '24', '27': '25', '28': '26', '29': '27', '30': '28', '31': '29', '32': '30', '33': '31', '34': '32', '35': '33', '36': '34', '37': '35', '38': '36', '39': '37', '40': '38', '41': '39', '42': '40', '43': '41', '44': '42', '45': '43', '46': '44', '47': '45', '48': '46', '49': '47', '50': '48', '51': '49', '52': '50', '53': '51', '54': '52', '55': '53', '56': '54', '57': '55', '58': '56', '59': '57', '60': '58', '61': '59', '62': '60', '63': '61', '64': '62', '65': '63', '66': '64', '67': '65', '68': '66', '69': '67', '70': '68', '71': '69', '72': '70', '73': '71', '74': '72', '75': '73', '76': '74', '77': '75', '78': '76', '79': '77', '80': '78', '81': '79', '82



Indexing error for 1ACB:I:F10W position 14 not present in mapping {'1': '1', '2': '2', '3': '3', '4': '4', '5': '5', '6': '6', '7': '7', '8': '8', '9': '9', '10': '10', '11': '11', '12': '12', '13': '13', '16': '14', '17': '15', '18': '16', '19': '17', '20': '18', '21': '19', '22': '20', '23': '21', '24': '22', '25': '23', '26': '24', '27': '25', '28': '26', '29': '27', '30': '28', '31': '29', '32': '30', '33': '31', '34': '32', '35': '33', '36': '34', '37': '35', '38': '36', '39': '37', '40': '38', '41': '39', '42': '40', '43': '41', '44': '42', '45': '43', '46': '44', '47': '45', '48': '46', '49': '47', '50': '48', '51': '49', '52': '50', '53': '51', '54': '52', '55': '53', '56': '54', '57': '55', '58': '56', '59': '57', '60': '58', '61': '59', '62': '60', '63': '61', '64': '62', '65': '63', '66': '64', '67': '65', '68': '66', '69': '67', '70': '68', '71': '69', '72': '70', '73': '71', '74': '72', '75': '73', '76': '74', '77': '75', '78': '76', '79': '77', '80': '78', '81': '79', '82

  1%|█▊                                                                                                                                                              | 62/5444 [00:11<08:59,  9.98it/s]

Error for 1ACB:I:F10W expected V at position 54 
Sequence is CGVPAIQPVLSGLIVNGEEAVPGSWPWQVSLQDKTGFHFCGGSLINENWVVTAAHCGVTTSDVVVAGEFDQGSSSEKIQKLKIAKVFKNSKYNSLTINNDITLLKLSTAASFSQTVSAVCLPSASDDFAAGTTCVTTGWGLTRYANTPDRLQQASLPLLSNTNCKKYWGTKIKDAMICAGASGVSSCMGDSGGPLVCKKNGAWTLVGIVSWGSSTCSTSTPGVYARVTALVNWVQQTLAAN
Mapping is {'1': '1', '2': '2', '3': '3', '4': '4', '5': '5', '6': '6', '7': '7', '8': '8', '9': '9', '10': '10', '11': '11', '12': '12', '13': '13', '16': '14', '17': '15', '18': '16', '19': '17', '20': '18', '21': '19', '22': '20', '23': '21', '24': '22', '25': '23', '26': '24', '27': '25', '28': '26', '29': '27', '30': '28', '31': '29', '32': '30', '33': '31', '34': '32', '35': '33', '36': '34', '37': '35', '38': '36', '39': '37', '40': '38', '41': '39', '42': '40', '43': '41', '44': '42', '45': '43', '46': '44', '47': '45', '48': '46', '49': '47', '50': '48', '51': '49', '52': '50', '53': '51', '54': '52', '55': '53', '56': '54', '57': '55', '58': '56', '59': '57', '60': '58', '61': '





  5%|████████▍                                                                                                                                                      | 288/5444 [01:24<03:55, 21.87it/s]

Indexing error for 1AZP position 30 not present in mapping {}




Indexing error for 1BF4:Y34W position 45 not present in mapping {}
Indexing error for 1BF4:Y34W position 51 not present in mapping {}
Indexing error for 1BF4:Y34W position 35 not present in mapping {}
Indexing error for 1BF4:Y34W position 12 not present in mapping {}
Indexing error for 1BF4:Y34W position 32 not present in mapping {}
Indexing error for 1BF4:Y34W position 27 not present in mapping {}




Indexing error for 1BF4:Y34W position 37 not present in mapping {}
Indexing error for 1BF4:Y34W position 17 not present in mapping {}
Indexing error for 1BF4:Y34W position 30 not present in mapping {}
Indexing error for 1BF4:Y34W position 7 not present in mapping {}
Indexing error for 1BF4:Y34W position 55 not present in mapping {}
Indexing error for 1BF4:Y34W position 56 not present in mapping {}




Indexing error for 1BF4:Y34W position 59 not present in mapping {}
Indexing error for 1BF4:Y34W position 57 not present in mapping {}
Indexing error for 1BF4:Y34W position 25 not present in mapping {}
Indexing error for 1BF4:Y34W position 43 not present in mapping {}
Indexing error for 1BF4:Y34W position 18 not present in mapping {}


  6%|█████████                                                                                                                                                      | 311/5444 [01:30<07:11, 11.90it/s]

Indexing error for 1BF4:Y34W position 31 not present in mapping {}
Indexing error for 1BF4:Y34W position 47 not present in mapping {}




Indexing error for 1BF4:Y34W position 41 not present in mapping {}
Indexing error for 1BF4:Y34W position 15 not present in mapping {}
Indexing error for 1BF4:Y34W position 23 not present in mapping {}
Indexing error for 1BF4:Y34W position 46 not present in mapping {}
Indexing error for 1BF4:Y34W position 4 not present in mapping {}


  6%|█████████▎                                                                                                                                                     | 317/5444 [01:32<24:25,  3.50it/s]

In [103]:
pd.DataFrame({'wt_seq': wt, 
              'mut_seq': mut ,
              'ddg': ddg, 
              'pdb_id': pdb_ids, 
              'mut_info': mut_infos,
              'pos': poss}).to_csv('DATASETS/deepddg_train.csv')

# DeepDDG test

In [104]:
df_deepddg_test = pd.read_csv('DATA/deep_ddg_test.csv', sep = ';')

In [105]:
print('Total dataset length', len(df_deepddg_test))
pdb_ids = list(set([t.split()[0].upper() for t in df_deepddg_test['PDB ID with modifications to be made'].to_list()]))
print('Total number of different chains in dataset', len(pdb_ids))

Total dataset length 276
Total number of different chains in dataset 37


In [106]:
for pdb_id in pdb_ids:
    if not os.path.isfile(f"PDB/{pdb_id[:4]}.pdb"):
        with open(f"PDB/{pdb_id[:4]}.pdb", "w") as fh:
            fh.write(pdb_client.get_pdb_file(f"{pdb_id[:4]}", compression=False))

Sending GET request to https://files.rcsb.org/download/1G3P.pdb to fetch 1G3P's pdb file as a string.
Sending GET request to https://files.rcsb.org/download/1GUA.pdb to fetch 1GUA's pdb file as a string.
Sending GET request to https://files.rcsb.org/download/1O6X.pdb to fetch 1O6X's pdb file as a string.
Sending GET request to https://files.rcsb.org/download/1HCQ.pdb to fetch 1HCQ's pdb file as a string.
Sending GET request to https://files.rcsb.org/download/3FFN.pdb to fetch 3FFN's pdb file as a string.
Sending GET request to https://files.rcsb.org/download/1IV7.pdb to fetch 1IV7's pdb file as a string.
Sending GET request to https://files.rcsb.org/download/4HE7.pdb to fetch 4HE7's pdb file as a string.
Sending GET request to https://files.rcsb.org/download/1J8I.pdb to fetch 1J8I's pdb file as a string.
Sending GET request to https://files.rcsb.org/download/1JL9.pdb to fetch 1JL9's pdb file as a string.
Sending GET request to https://files.rcsb.org/download/1E0L.pdb to fetch 1E0L's pd

In [107]:
wt = []
mut = []
ddg = []
pdb_ids = []
mut_infos = []
poss = []

#no_verbatim_pdb_ids = {'1C9OA', '1VQBA'}


print('Processing DeepDDG test')

for idx in tqdm(range(len(df_deepddg_test))):
    pdb_id = df_deepddg_test.iloc[idx]['PDB ID with modifications to be made'].upper()
    wild_aa = df_deepddg_test.iloc[idx]['Mutation'][0]
    pos = df_deepddg_test.iloc[idx]['Mutation'][2:-2]
    mutant_aa = df_deepddg_test.iloc[idx]['Mutation'][-1]
    exp_ddg = df_deepddg_test.iloc[idx]['ΔΔG (kcal/mol) positive is stable']
    
    #if pdb_id!= '1CLWA':
    #    continue
        
    
    pdb = PDBParser().get_structure("pdb_id[:4]", f'PDB/{pdb_id[:4]}.pdb')
    chain = next(pdb.get_chains()).get_id()
        
    _, _, sequence, pdb2seq_pos, seq2pdb_pos = pdb2info(f'PDB/{pdb_id[:4]}.pdb', chain)
    
    #if pdb_id not in no_verbatim_pdb_ids:
    #    seq2pdb_pos = {str(i):str(i) for i in range(len(sequence))}
    
    if pos not in seq2pdb_pos:
        print(f'Indexing error for {pdb_id} position {pos} not present in mapping {seq2pdb_pos}')
        
    else:
        if sequence[int(seq2pdb_pos[pos])-1]!=wild_aa:
            print(f'Error for {pdb_id} expected {wild_aa} at position {pos} ')
            print(f'Sequence is {sequence}')
            print(f'Mapping is {seq2pdb_pos}')
        
        else:
            wt.append(sequence)
            tt = list(sequence)
            tt[int(seq2pdb_pos[pos])-1] = mutant_aa
            poss.append(int(seq2pdb_pos[pos])-1)
            mut.append(''.join(tt))
            ddg.append(exp_ddg)
            pdb_ids.append(pdb_id)
            mut_infos.append(str(wild_aa) + pos + str(mutant_aa))

Processing DeepDDG test




Indexing error for 1GLU position 500 not present in mapping {}
Error for 1GUA expected L at position 82 
Sequence is MREYKLVVLGSGGVGKSALTVQFVQGIFVDEYDPTIEDSYRKQVEVDCQQCMLEILDTAGTEQFTAMRDLYMKNGQGFALVYSITAQSTFNDLQDLREQILRVKDTEDVPMILVGNKCDLEDERVVGKEQGQNLARQWCNCAFLESSAKSKINVNEIFYDLVRQINR
Mapping is {'1': '1', '2': '2', '3': '3', '4': '4', '5': '5', '6': '6', '7': '7', '8': '8', '9': '9', '10': '10', '11': '11', '12': '12', '13': '13', '14': '14', '15': '15', '16': '16', '17': '17', '18': '18', '19': '19', '20': '20', '21': '21', '22': '22', '23': '23', '24': '24', '25': '25', '26': '26', '27': '27', '28': '28', '29': '29', '30': '30', '31': '31', '32': '32', '33': '33', '34': '34', '35': '35', '36': '36', '37': '37', '38': '38', '39': '39', '40': '40', '41': '41', '42': '42', '43': '43', '44': '44', '45': '45', '46': '46', '47': '47', '48': '48', '49': '49', '50': '50', '51': '51', '52': '52', '53': '53', '54': '54', '55': '55', '56': '56', '57': '57', '58': '58', '59': '59', '60': '60', '

 34%|██████████████████████████████████████████████████████▎                                                                                                          | 93/276 [00:12<00:15, 11.69it/s]

Error for 1GUA expected L at position 82 
Sequence is MREYKLVVLGSGGVGKSALTVQFVQGIFVDEYDPTIEDSYRKQVEVDCQQCMLEILDTAGTEQFTAMRDLYMKNGQGFALVYSITAQSTFNDLQDLREQILRVKDTEDVPMILVGNKCDLEDERVVGKEQGQNLARQWCNCAFLESSAKSKINVNEIFYDLVRQINR
Mapping is {'1': '1', '2': '2', '3': '3', '4': '4', '5': '5', '6': '6', '7': '7', '8': '8', '9': '9', '10': '10', '11': '11', '12': '12', '13': '13', '14': '14', '15': '15', '16': '16', '17': '17', '18': '18', '19': '19', '20': '20', '21': '21', '22': '22', '23': '23', '24': '24', '25': '25', '26': '26', '27': '27', '28': '28', '29': '29', '30': '30', '31': '31', '32': '32', '33': '33', '34': '34', '35': '35', '36': '36', '37': '37', '38': '38', '39': '39', '40': '40', '41': '41', '42': '42', '43': '43', '44': '44', '45': '45', '46': '46', '47': '47', '48': '48', '49': '49', '50': '50', '51': '51', '52': '52', '53': '53', '54': '54', '55': '55', '56': '56', '57': '57', '58': '58', '59': '59', '60': '60', '61': '61', '62': '62', '63': '63', '64': '64', '65': '65', '66'



Error for 1GUA expected I at position 58 
Sequence is MREYKLVVLGSGGVGKSALTVQFVQGIFVDEYDPTIEDSYRKQVEVDCQQCMLEILDTAGTEQFTAMRDLYMKNGQGFALVYSITAQSTFNDLQDLREQILRVKDTEDVPMILVGNKCDLEDERVVGKEQGQNLARQWCNCAFLESSAKSKINVNEIFYDLVRQINR
Mapping is {'1': '1', '2': '2', '3': '3', '4': '4', '5': '5', '6': '6', '7': '7', '8': '8', '9': '9', '10': '10', '11': '11', '12': '12', '13': '13', '14': '14', '15': '15', '16': '16', '17': '17', '18': '18', '19': '19', '20': '20', '21': '21', '22': '22', '23': '23', '24': '24', '25': '25', '26': '26', '27': '27', '28': '28', '29': '29', '30': '30', '31': '31', '32': '32', '33': '33', '34': '34', '35': '35', '36': '36', '37': '37', '38': '38', '39': '39', '40': '40', '41': '41', '42': '42', '43': '43', '44': '44', '45': '45', '46': '46', '47': '47', '48': '48', '49': '49', '50': '50', '51': '51', '52': '52', '53': '53', '54': '54', '55': '55', '56': '56', '57': '57', '58': '58', '59': '59', '60': '60', '61': '61', '62': '62', '63': '63', '64': '64', '65': '65', '66'

 36%|█████████████████████████████████████████████████████████▊                                                                                                       | 99/276 [00:13<00:15, 11.26it/s]

Error for 1GUA expected R at position 59 
Sequence is MREYKLVVLGSGGVGKSALTVQFVQGIFVDEYDPTIEDSYRKQVEVDCQQCMLEILDTAGTEQFTAMRDLYMKNGQGFALVYSITAQSTFNDLQDLREQILRVKDTEDVPMILVGNKCDLEDERVVGKEQGQNLARQWCNCAFLESSAKSKINVNEIFYDLVRQINR
Mapping is {'1': '1', '2': '2', '3': '3', '4': '4', '5': '5', '6': '6', '7': '7', '8': '8', '9': '9', '10': '10', '11': '11', '12': '12', '13': '13', '14': '14', '15': '15', '16': '16', '17': '17', '18': '18', '19': '19', '20': '20', '21': '21', '22': '22', '23': '23', '24': '24', '25': '25', '26': '26', '27': '27', '28': '28', '29': '29', '30': '30', '31': '31', '32': '32', '33': '33', '34': '34', '35': '35', '36': '36', '37': '37', '38': '38', '39': '39', '40': '40', '41': '41', '42': '42', '43': '43', '44': '44', '45': '45', '46': '46', '47': '47', '48': '48', '49': '49', '50': '50', '51': '51', '52': '52', '53': '53', '54': '54', '55': '55', '56': '56', '57': '57', '58': '58', '59': '59', '60': '60', '61': '61', '62': '62', '63': '63', '64': '64', '65': '65', '66'

 37%|███████████████████████████████████████████████████████████▋                                                                                                    | 103/276 [00:13<00:13, 13.17it/s]

Error for 1GUA expected N at position 64 
Sequence is MREYKLVVLGSGGVGKSALTVQFVQGIFVDEYDPTIEDSYRKQVEVDCQQCMLEILDTAGTEQFTAMRDLYMKNGQGFALVYSITAQSTFNDLQDLREQILRVKDTEDVPMILVGNKCDLEDERVVGKEQGQNLARQWCNCAFLESSAKSKINVNEIFYDLVRQINR
Mapping is {'1': '1', '2': '2', '3': '3', '4': '4', '5': '5', '6': '6', '7': '7', '8': '8', '9': '9', '10': '10', '11': '11', '12': '12', '13': '13', '14': '14', '15': '15', '16': '16', '17': '17', '18': '18', '19': '19', '20': '20', '21': '21', '22': '22', '23': '23', '24': '24', '25': '25', '26': '26', '27': '27', '28': '28', '29': '29', '30': '30', '31': '31', '32': '32', '33': '33', '34': '34', '35': '35', '36': '36', '37': '37', '38': '38', '39': '39', '40': '40', '41': '41', '42': '42', '43': '43', '44': '44', '45': '45', '46': '46', '47': '47', '48': '48', '49': '49', '50': '50', '51': '51', '52': '52', '53': '53', '54': '54', '55': '55', '56': '56', '57': '57', '58': '58', '59': '59', '60': '60', '61': '61', '62': '62', '63': '63', '64': '64', '65': '65', '66'

 39%|██████████████████████████████████████████████████████████████                                                                                                  | 107/276 [00:14<00:14, 11.59it/s]

Error for 1GUA expected V at position 69 
Sequence is MREYKLVVLGSGGVGKSALTVQFVQGIFVDEYDPTIEDSYRKQVEVDCQQCMLEILDTAGTEQFTAMRDLYMKNGQGFALVYSITAQSTFNDLQDLREQILRVKDTEDVPMILVGNKCDLEDERVVGKEQGQNLARQWCNCAFLESSAKSKINVNEIFYDLVRQINR
Mapping is {'1': '1', '2': '2', '3': '3', '4': '4', '5': '5', '6': '6', '7': '7', '8': '8', '9': '9', '10': '10', '11': '11', '12': '12', '13': '13', '14': '14', '15': '15', '16': '16', '17': '17', '18': '18', '19': '19', '20': '20', '21': '21', '22': '22', '23': '23', '24': '24', '25': '25', '26': '26', '27': '27', '28': '28', '29': '29', '30': '30', '31': '31', '32': '32', '33': '33', '34': '34', '35': '35', '36': '36', '37': '37', '38': '38', '39': '39', '40': '40', '41': '41', '42': '42', '43': '43', '44': '44', '45': '45', '46': '46', '47': '47', '48': '48', '49': '49', '50': '50', '51': '51', '52': '52', '53': '53', '54': '54', '55': '55', '56': '56', '57': '57', '58': '58', '59': '59', '60': '60', '61': '61', '62': '62', '63': '63', '64': '64', '65': '65', '66'



Error for 1GUA expected M at position 76 
Sequence is MREYKLVVLGSGGVGKSALTVQFVQGIFVDEYDPTIEDSYRKQVEVDCQQCMLEILDTAGTEQFTAMRDLYMKNGQGFALVYSITAQSTFNDLQDLREQILRVKDTEDVPMILVGNKCDLEDERVVGKEQGQNLARQWCNCAFLESSAKSKINVNEIFYDLVRQINR
Mapping is {'1': '1', '2': '2', '3': '3', '4': '4', '5': '5', '6': '6', '7': '7', '8': '8', '9': '9', '10': '10', '11': '11', '12': '12', '13': '13', '14': '14', '15': '15', '16': '16', '17': '17', '18': '18', '19': '19', '20': '20', '21': '21', '22': '22', '23': '23', '24': '24', '25': '25', '26': '26', '27': '27', '28': '28', '29': '29', '30': '30', '31': '31', '32': '32', '33': '33', '34': '34', '35': '35', '36': '36', '37': '37', '38': '38', '39': '39', '40': '40', '41': '41', '42': '42', '43': '43', '44': '44', '45': '45', '46': '46', '47': '47', '48': '48', '49': '49', '50': '50', '51': '51', '52': '52', '53': '53', '54': '54', '55': '55', '56': '56', '57': '57', '58': '58', '59': '59', '60': '60', '61': '61', '62': '62', '63': '63', '64': '64', '65': '65', '66'

 41%|█████████████████████████████████████████████████████████████████▌                                                                                              | 113/276 [00:14<00:14, 11.16it/s]

Error for 1GUA expected S at position 77 
Sequence is MREYKLVVLGSGGVGKSALTVQFVQGIFVDEYDPTIEDSYRKQVEVDCQQCMLEILDTAGTEQFTAMRDLYMKNGQGFALVYSITAQSTFNDLQDLREQILRVKDTEDVPMILVGNKCDLEDERVVGKEQGQNLARQWCNCAFLESSAKSKINVNEIFYDLVRQINR
Mapping is {'1': '1', '2': '2', '3': '3', '4': '4', '5': '5', '6': '6', '7': '7', '8': '8', '9': '9', '10': '10', '11': '11', '12': '12', '13': '13', '14': '14', '15': '15', '16': '16', '17': '17', '18': '18', '19': '19', '20': '20', '21': '21', '22': '22', '23': '23', '24': '24', '25': '25', '26': '26', '27': '27', '28': '28', '29': '29', '30': '30', '31': '31', '32': '32', '33': '33', '34': '34', '35': '35', '36': '36', '37': '37', '38': '38', '39': '39', '40': '40', '41': '41', '42': '42', '43': '43', '44': '44', '45': '45', '46': '46', '47': '47', '48': '48', '49': '49', '50': '50', '51': '51', '52': '52', '53': '53', '54': '54', '55': '55', '56': '56', '57': '57', '58': '58', '59': '59', '60': '60', '61': '61', '62': '62', '63': '63', '64': '64', '65': '65', '66'



Error for 1GUA expected C at position 81 
Sequence is MREYKLVVLGSGGVGKSALTVQFVQGIFVDEYDPTIEDSYRKQVEVDCQQCMLEILDTAGTEQFTAMRDLYMKNGQGFALVYSITAQSTFNDLQDLREQILRVKDTEDVPMILVGNKCDLEDERVVGKEQGQNLARQWCNCAFLESSAKSKINVNEIFYDLVRQINR
Mapping is {'1': '1', '2': '2', '3': '3', '4': '4', '5': '5', '6': '6', '7': '7', '8': '8', '9': '9', '10': '10', '11': '11', '12': '12', '13': '13', '14': '14', '15': '15', '16': '16', '17': '17', '18': '18', '19': '19', '20': '20', '21': '21', '22': '22', '23': '23', '24': '24', '25': '25', '26': '26', '27': '27', '28': '28', '29': '29', '30': '30', '31': '31', '32': '32', '33': '33', '34': '34', '35': '35', '36': '36', '37': '37', '38': '38', '39': '39', '40': '40', '41': '41', '42': '42', '43': '43', '44': '44', '45': '45', '46': '46', '47': '47', '48': '48', '49': '49', '50': '50', '51': '51', '52': '52', '53': '53', '54': '54', '55': '55', '56': '56', '57': '57', '58': '58', '59': '59', '60': '60', '61': '61', '62': '62', '63': '63', '64': '64', '65': '65', '66'

 43%|████████████████████████████████████████████████████████████████████▉                                                                                           | 119/276 [00:15<00:13, 11.22it/s]

Error for 1GUA expected A at position 85 
Sequence is MREYKLVVLGSGGVGKSALTVQFVQGIFVDEYDPTIEDSYRKQVEVDCQQCMLEILDTAGTEQFTAMRDLYMKNGQGFALVYSITAQSTFNDLQDLREQILRVKDTEDVPMILVGNKCDLEDERVVGKEQGQNLARQWCNCAFLESSAKSKINVNEIFYDLVRQINR
Mapping is {'1': '1', '2': '2', '3': '3', '4': '4', '5': '5', '6': '6', '7': '7', '8': '8', '9': '9', '10': '10', '11': '11', '12': '12', '13': '13', '14': '14', '15': '15', '16': '16', '17': '17', '18': '18', '19': '19', '20': '20', '21': '21', '22': '22', '23': '23', '24': '24', '25': '25', '26': '26', '27': '27', '28': '28', '29': '29', '30': '30', '31': '31', '32': '32', '33': '33', '34': '34', '35': '35', '36': '36', '37': '37', '38': '38', '39': '39', '40': '40', '41': '41', '42': '42', '43': '43', '44': '44', '45': '45', '46': '46', '47': '47', '48': '48', '49': '49', '50': '50', '51': '51', '52': '52', '53': '53', '54': '54', '55': '55', '56': '56', '57': '57', '58': '58', '59': '59', '60': '60', '61': '61', '62': '62', '63': '63', '64': '64', '65': '65', '66'



Error for 1GUA expected P at position 93 
Sequence is MREYKLVVLGSGGVGKSALTVQFVQGIFVDEYDPTIEDSYRKQVEVDCQQCMLEILDTAGTEQFTAMRDLYMKNGQGFALVYSITAQSTFNDLQDLREQILRVKDTEDVPMILVGNKCDLEDERVVGKEQGQNLARQWCNCAFLESSAKSKINVNEIFYDLVRQINR
Mapping is {'1': '1', '2': '2', '3': '3', '4': '4', '5': '5', '6': '6', '7': '7', '8': '8', '9': '9', '10': '10', '11': '11', '12': '12', '13': '13', '14': '14', '15': '15', '16': '16', '17': '17', '18': '18', '19': '19', '20': '20', '21': '21', '22': '22', '23': '23', '24': '24', '25': '25', '26': '26', '27': '27', '28': '28', '29': '29', '30': '30', '31': '31', '32': '32', '33': '33', '34': '34', '35': '35', '36': '36', '37': '37', '38': '38', '39': '39', '40': '40', '41': '41', '42': '42', '43': '43', '44': '44', '45': '45', '46': '46', '47': '47', '48': '48', '49': '49', '50': '50', '51': '51', '52': '52', '53': '53', '54': '54', '55': '55', '56': '56', '57': '57', '58': '58', '59': '59', '60': '60', '61': '61', '62': '62', '63': '63', '64': '64', '65': '65', '66'

 45%|████████████████████████████████████████████████████████████████████████▍                                                                                       | 125/276 [00:15<00:13, 11.58it/s]

Error for 1GUA expected C at position 96 
Sequence is MREYKLVVLGSGGVGKSALTVQFVQGIFVDEYDPTIEDSYRKQVEVDCQQCMLEILDTAGTEQFTAMRDLYMKNGQGFALVYSITAQSTFNDLQDLREQILRVKDTEDVPMILVGNKCDLEDERVVGKEQGQNLARQWCNCAFLESSAKSKINVNEIFYDLVRQINR
Mapping is {'1': '1', '2': '2', '3': '3', '4': '4', '5': '5', '6': '6', '7': '7', '8': '8', '9': '9', '10': '10', '11': '11', '12': '12', '13': '13', '14': '14', '15': '15', '16': '16', '17': '17', '18': '18', '19': '19', '20': '20', '21': '21', '22': '22', '23': '23', '24': '24', '25': '25', '26': '26', '27': '27', '28': '28', '29': '29', '30': '30', '31': '31', '32': '32', '33': '33', '34': '34', '35': '35', '36': '36', '37': '37', '38': '38', '39': '39', '40': '40', '41': '41', '42': '42', '43': '43', '44': '44', '45': '45', '46': '46', '47': '47', '48': '48', '49': '49', '50': '50', '51': '51', '52': '52', '53': '53', '54': '54', '55': '55', '56': '56', '57': '57', '58': '58', '59': '59', '60': '60', '61': '61', '62': '62', '63': '63', '64': '64', '65': '65', '66'



Error for 1GUA expected V at position 98 
Sequence is MREYKLVVLGSGGVGKSALTVQFVQGIFVDEYDPTIEDSYRKQVEVDCQQCMLEILDTAGTEQFTAMRDLYMKNGQGFALVYSITAQSTFNDLQDLREQILRVKDTEDVPMILVGNKCDLEDERVVGKEQGQNLARQWCNCAFLESSAKSKINVNEIFYDLVRQINR
Mapping is {'1': '1', '2': '2', '3': '3', '4': '4', '5': '5', '6': '6', '7': '7', '8': '8', '9': '9', '10': '10', '11': '11', '12': '12', '13': '13', '14': '14', '15': '15', '16': '16', '17': '17', '18': '18', '19': '19', '20': '20', '21': '21', '22': '22', '23': '23', '24': '24', '25': '25', '26': '26', '27': '27', '28': '28', '29': '29', '30': '30', '31': '31', '32': '32', '33': '33', '34': '34', '35': '35', '36': '36', '37': '37', '38': '38', '39': '39', '40': '40', '41': '41', '42': '42', '43': '43', '44': '44', '45': '45', '46': '46', '47': '47', '48': '48', '49': '49', '50': '50', '51': '51', '52': '52', '53': '53', '54': '54', '55': '55', '56': '56', '57': '57', '58': '58', '59': '59', '60': '60', '61': '61', '62': '62', '63': '63', '64': '64', '65': '65', '66'

 47%|██████████████████████████████████████████████████████████████████████████▊                                                                                     | 129/276 [00:16<00:14, 10.16it/s]

Error for 1GUA expected E at position 104 
Sequence is MREYKLVVLGSGGVGKSALTVQFVQGIFVDEYDPTIEDSYRKQVEVDCQQCMLEILDTAGTEQFTAMRDLYMKNGQGFALVYSITAQSTFNDLQDLREQILRVKDTEDVPMILVGNKCDLEDERVVGKEQGQNLARQWCNCAFLESSAKSKINVNEIFYDLVRQINR
Mapping is {'1': '1', '2': '2', '3': '3', '4': '4', '5': '5', '6': '6', '7': '7', '8': '8', '9': '9', '10': '10', '11': '11', '12': '12', '13': '13', '14': '14', '15': '15', '16': '16', '17': '17', '18': '18', '19': '19', '20': '20', '21': '21', '22': '22', '23': '23', '24': '24', '25': '25', '26': '26', '27': '27', '28': '28', '29': '29', '30': '30', '31': '31', '32': '32', '33': '33', '34': '34', '35': '35', '36': '36', '37': '37', '38': '38', '39': '39', '40': '40', '41': '41', '42': '42', '43': '43', '44': '44', '45': '45', '46': '46', '47': '47', '48': '48', '49': '49', '50': '50', '51': '51', '52': '52', '53': '53', '54': '54', '55': '55', '56': '56', '57': '57', '58': '58', '59': '59', '60': '60', '61': '61', '62': '62', '63': '63', '64': '64', '65': '65', '66

 48%|█████████████████████████████████████████████████████████████████████████████                                                                                   | 133/276 [00:16<00:11, 12.27it/s]

Error for 1GUA expected D at position 117 
Sequence is MREYKLVVLGSGGVGKSALTVQFVQGIFVDEYDPTIEDSYRKQVEVDCQQCMLEILDTAGTEQFTAMRDLYMKNGQGFALVYSITAQSTFNDLQDLREQILRVKDTEDVPMILVGNKCDLEDERVVGKEQGQNLARQWCNCAFLESSAKSKINVNEIFYDLVRQINR
Mapping is {'1': '1', '2': '2', '3': '3', '4': '4', '5': '5', '6': '6', '7': '7', '8': '8', '9': '9', '10': '10', '11': '11', '12': '12', '13': '13', '14': '14', '15': '15', '16': '16', '17': '17', '18': '18', '19': '19', '20': '20', '21': '21', '22': '22', '23': '23', '24': '24', '25': '25', '26': '26', '27': '27', '28': '28', '29': '29', '30': '30', '31': '31', '32': '32', '33': '33', '34': '34', '35': '35', '36': '36', '37': '37', '38': '38', '39': '39', '40': '40', '41': '41', '42': '42', '43': '43', '44': '44', '45': '45', '46': '46', '47': '47', '48': '48', '49': '49', '50': '50', '51': '51', '52': '52', '53': '53', '54': '54', '55': '55', '56': '56', '57': '57', '58': '58', '59': '59', '60': '60', '61': '61', '62': '62', '63': '63', '64': '64', '65': '65', '66

 50%|███████████████████████████████████████████████████████████████████████████████▍                                                                                | 137/276 [00:16<00:12, 11.37it/s]

Error for 1GUA expected L at position 121 
Sequence is MREYKLVVLGSGGVGKSALTVQFVQGIFVDEYDPTIEDSYRKQVEVDCQQCMLEILDTAGTEQFTAMRDLYMKNGQGFALVYSITAQSTFNDLQDLREQILRVKDTEDVPMILVGNKCDLEDERVVGKEQGQNLARQWCNCAFLESSAKSKINVNEIFYDLVRQINR
Mapping is {'1': '1', '2': '2', '3': '3', '4': '4', '5': '5', '6': '6', '7': '7', '8': '8', '9': '9', '10': '10', '11': '11', '12': '12', '13': '13', '14': '14', '15': '15', '16': '16', '17': '17', '18': '18', '19': '19', '20': '20', '21': '21', '22': '22', '23': '23', '24': '24', '25': '25', '26': '26', '27': '27', '28': '28', '29': '29', '30': '30', '31': '31', '32': '32', '33': '33', '34': '34', '35': '35', '36': '36', '37': '37', '38': '38', '39': '39', '40': '40', '41': '41', '42': '42', '43': '43', '44': '44', '45': '45', '46': '46', '47': '47', '48': '48', '49': '49', '50': '50', '51': '51', '52': '52', '53': '53', '54': '54', '55': '55', '56': '56', '57': '57', '58': '58', '59': '59', '60': '60', '61': '61', '62': '62', '63': '63', '64': '64', '65': '65', '66



Error for 1GUA expected V at position 128 
Sequence is MREYKLVVLGSGGVGKSALTVQFVQGIFVDEYDPTIEDSYRKQVEVDCQQCMLEILDTAGTEQFTAMRDLYMKNGQGFALVYSITAQSTFNDLQDLREQILRVKDTEDVPMILVGNKCDLEDERVVGKEQGQNLARQWCNCAFLESSAKSKINVNEIFYDLVRQINR
Mapping is {'1': '1', '2': '2', '3': '3', '4': '4', '5': '5', '6': '6', '7': '7', '8': '8', '9': '9', '10': '10', '11': '11', '12': '12', '13': '13', '14': '14', '15': '15', '16': '16', '17': '17', '18': '18', '19': '19', '20': '20', '21': '21', '22': '22', '23': '23', '24': '24', '25': '25', '26': '26', '27': '27', '28': '28', '29': '29', '30': '30', '31': '31', '32': '32', '33': '33', '34': '34', '35': '35', '36': '36', '37': '37', '38': '38', '39': '39', '40': '40', '41': '41', '42': '42', '43': '43', '44': '44', '45': '45', '46': '46', '47': '47', '48': '48', '49': '49', '50': '50', '51': '51', '52': '52', '53': '53', '54': '54', '55': '55', '56': '56', '57': '57', '58': '58', '59': '59', '60': '60', '61': '61', '62': '62', '63': '63', '64': '64', '65': '65', '66

 51%|██████████████████████████████████████████████████████████████████████████████████▎                                                                             | 142/276 [00:17<00:12, 11.12it/s]

Indexing error for 1HCQ position 67 not present in mapping {}
Indexing error for 1HCQ position 67 not present in mapping {}
Indexing error for 1IV7 position 141 not present in mapping {'1': '1', '2': '2', '3': '3', '4': '4', '5': '5', '6': '6', '7': '7', '8': '8', '9': '9', '10': '10', '11': '11', '12': '12', '13': '13', '14': '14', '15': '15', '16': '16', '17': '17', '18': '18', '19': '19', '20': '20', '21': '21', '22': '22', '23': '23', '24': '24', '25': '25', '26': '26', '27': '27', '28': '28', '29': '29', '30': '30', '31': '31', '32': '32', '33': '33', '34': '34', '35': '35', '36': '36', '37': '37', '38': '38', '39': '39', '40': '40', '41': '41', '42': '42', '43': '43', '44': '44', '45': '45', '46': '46', '47': '47', '48': '48', '49': '49', '50': '50', '51': '51', '52': '52', '53': '53', '54': '54', '55': '55', '56': '56', '57': '57', '58': '58', '59': '59', '60': '60', '61': '61', '62': '62', '63': '63', '64': '64', '65': '65', '66': '66', '67': '67', '68': '68', '69': '69', '70':



Indexing error for 1IV7 position 119 not present in mapping {'1': '1', '2': '2', '3': '3', '4': '4', '5': '5', '6': '6', '7': '7', '8': '8', '9': '9', '10': '10', '11': '11', '12': '12', '13': '13', '14': '14', '15': '15', '16': '16', '17': '17', '18': '18', '19': '19', '20': '20', '21': '21', '22': '22', '23': '23', '24': '24', '25': '25', '26': '26', '27': '27', '28': '28', '29': '29', '30': '30', '31': '31', '32': '32', '33': '33', '34': '34', '35': '35', '36': '36', '37': '37', '38': '38', '39': '39', '40': '40', '41': '41', '42': '42', '43': '43', '44': '44', '45': '45', '46': '46', '47': '47', '48': '48', '49': '49', '50': '50', '51': '51', '52': '52', '53': '53', '54': '54', '55': '55', '56': '56', '57': '57', '58': '58', '59': '59', '60': '60', '61': '61', '62': '62', '63': '63', '64': '64', '65': '65', '66': '66', '67': '67', '68': '68', '69': '69', '70': '70', '71': '71', '72': '72', '73': '73', '74': '74', '75': '75', '76': '76', '77': '77', '78': '78', '79': '79', '80': '80



Indexing error for 1IV7 position 189 not present in mapping {'1': '1', '2': '2', '3': '3', '4': '4', '5': '5', '6': '6', '7': '7', '8': '8', '9': '9', '10': '10', '11': '11', '12': '12', '13': '13', '14': '14', '15': '15', '16': '16', '17': '17', '18': '18', '19': '19', '20': '20', '21': '21', '22': '22', '23': '23', '24': '24', '25': '25', '26': '26', '27': '27', '28': '28', '29': '29', '30': '30', '31': '31', '32': '32', '33': '33', '34': '34', '35': '35', '36': '36', '37': '37', '38': '38', '39': '39', '40': '40', '41': '41', '42': '42', '43': '43', '44': '44', '45': '45', '46': '46', '47': '47', '48': '48', '49': '49', '50': '50', '51': '51', '52': '52', '53': '53', '54': '54', '55': '55', '56': '56', '57': '57', '58': '58', '59': '59', '60': '60', '61': '61', '62': '62', '63': '63', '64': '64', '65': '65', '66': '66', '67': '67', '68': '68', '69': '69', '70': '70', '71': '71', '72': '72', '73': '73', '74': '74', '75': '75', '76': '76', '77': '77', '78': '78', '79': '79', '80': '80





 72%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                             | 198/276 [00:43<00:18,  4.21it/s]

Error for 2CLR expected H at position 13 
Sequence is GSHSMRYFFTSVSRPGRGEPRFIAVGYVDDTQFVRFDSDAASQRMEPRAPWIEQEGPEYWDGETRKVKAHSQTHRVDLGTLRGYYNQSEAGSHTVQRMYGCDVGSDWRFLRGYHQYAYDGKDYIALKEDLRSWTAADMAAQTTKHKWEAAHVAEQLRAYLEGTCVEWLRRYLENGKETLQRTDAPKTHMTHHAVSDHEATLRCWALSFYPAEITLTWQRDGEDQTQDTELVETRPAGDGTFQKWAAVVVPSGQEQRYTCHVQHEGLPKPLTLRWE
Mapping is {'1': '1', '2': '2', '3': '3', '4': '4', '5': '5', '6': '6', '7': '7', '8': '8', '9': '9', '10': '10', '11': '11', '12': '12', '13': '13', '14': '14', '15': '15', '16': '16', '17': '17', '18': '18', '19': '19', '20': '20', '21': '21', '22': '22', '23': '23', '24': '24', '25': '25', '26': '26', '27': '27', '28': '28', '29': '29', '30': '30', '31': '31', '32': '32', '33': '33', '34': '34', '35': '35', '36': '36', '37': '37', '38': '38', '39': '39', '40': '40', '41': '41', '42': '42', '43': '43', '44': '44', '45': '45', '46': '46', '47': '47', '48': '48', '49': '49', '50': '50', '51': '51', '52': '52', '53': '53', '54': '54', '55': '55', '56': '56', '57'

 72%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                            | 200/276 [00:43<00:17,  4.29it/s]

Error for 2CLR expected H at position 51 
Sequence is GSHSMRYFFTSVSRPGRGEPRFIAVGYVDDTQFVRFDSDAASQRMEPRAPWIEQEGPEYWDGETRKVKAHSQTHRVDLGTLRGYYNQSEAGSHTVQRMYGCDVGSDWRFLRGYHQYAYDGKDYIALKEDLRSWTAADMAAQTTKHKWEAAHVAEQLRAYLEGTCVEWLRRYLENGKETLQRTDAPKTHMTHHAVSDHEATLRCWALSFYPAEITLTWQRDGEDQTQDTELVETRPAGDGTFQKWAAVVVPSGQEQRYTCHVQHEGLPKPLTLRWE
Mapping is {'1': '1', '2': '2', '3': '3', '4': '4', '5': '5', '6': '6', '7': '7', '8': '8', '9': '9', '10': '10', '11': '11', '12': '12', '13': '13', '14': '14', '15': '15', '16': '16', '17': '17', '18': '18', '19': '19', '20': '20', '21': '21', '22': '22', '23': '23', '24': '24', '25': '25', '26': '26', '27': '27', '28': '28', '29': '29', '30': '30', '31': '31', '32': '32', '33': '33', '34': '34', '35': '35', '36': '36', '37': '37', '38': '38', '39': '39', '40': '40', '41': '41', '42': '42', '43': '43', '44': '44', '45': '45', '46': '46', '47': '47', '48': '48', '49': '49', '50': '50', '51': '51', '52': '52', '53': '53', '54': '54', '55': '55', '56': '56', '57'







100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 276/276 [00:55<00:00,  4.94it/s]


In [108]:
pd.DataFrame({'wt_seq': wt, 
              'mut_seq': mut ,
              'ddg': ddg, 
              'pdb_id': pdb_ids, 
              'mut_info': mut_infos,
              'pos': poss}).to_csv('DATASETS/deepddg_test.csv')

# Ssym

In [132]:
df_ssym = pd.read_csv('DATA/s_sym.txt', sep= ' ', names = ['PDB_ID', '_', 'POSITION', 'WILD_RES', 'MUTANT_RES', 'EXP_DDG'])

In [133]:
print('Total dataset length', len(df_ssym))
pdb_ids = list(set([t.split()[0].upper() for t in df_ssym['PDB_ID'].to_list()]))
print('Total number of different chains in dataset', len(pdb_ids))

Total dataset length 342
Total number of different chains in dataset 15


In [134]:
for pdb_id in pdb_ids:
    if not os.path.isfile(f"PDB/{pdb_id[:4]}.pdb"):
        with open(f"PDB/{pdb_id[:4]}.pdb", "w") as fh:
            fh.write(pdb_client.get_pdb_file(f"{pdb_id[:4]}", compression=False))

In [135]:
wt = []
mut = []
ddg = []
pdb_ids = []
mut_infos = []
poss = []

print('Processing Ssym')

for idx in tqdm(range(len(df_ssym))):
    pdb_id = df_ssym.iloc[idx]['PDB_ID'].upper()
    wild_aa = df_ssym.iloc[idx]['WILD_RES']
    pos = str(df_ssym.iloc[idx]['POSITION'])
    mutant_aa = df_ssym.iloc[idx]['MUTANT_RES']
    exp_ddg = df_ssym.iloc[idx]['EXP_DDG']
    
        
    _, _, sequence, pdb2seq_pos, seq2pdb_pos = pdb2info(f'PDB/{pdb_id[:4]}.pdb', pdb_id[-1])
    
    #if pdb_id not in no_verbatim_pdb_ids:
    #  seq2pdb_pos = {str(i):str(i) for i in range(len(sequence))}
    
    if pos not in seq2pdb_pos:
        print(f'Indexing error for {pdb_id} position {pos} not present in mapping {seq2pdb_pos}')
        
    else:
        if sequence[int(seq2pdb_pos[pos])-1]!=wild_aa:
            print(f'Error for {pdb_id} expected {wild_aa} at position {pos} ')
            print(f'Sequence is {sequence}')
            print(f'Mapping is {seq2pdb_pos}')
        
        else:
            wt.append(sequence)
            tt = list(sequence)
            tt[int(seq2pdb_pos[pos])-1] = mutant_aa
            poss.append(int(seq2pdb_pos[pos])-1)
            mut.append(''.join(tt))
            ddg.append(exp_ddg)
            pdb_ids.append(pdb_id)
            mut_infos.append(str(wild_aa) + pos + str(mutant_aa))

Processing Ssym


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 342/342 [00:14<00:00, 23.49it/s]


In [136]:
pd.DataFrame({'wt_seq': wt, 
              'mut_seq': mut ,
              'ddg': ddg, 
              'pdb_id': pdb_ids, 
              'mut_info': mut_infos,
              'pos': poss}).to_csv('DATASETS/ssym.csv')

In [137]:
pd.DataFrame({'wt_seq': mut, 
              'mut_seq': wt ,
              'ddg': [-t for t in ddg], 
              'pdb_id': pdb_ids, 
              'mut_info': [t[-1] + t[1:-1] + t[0] for t in mut_infos],
              'pos': poss}).to_csv('DATASETS/ssym_r.csv')

# Myoglobin

In [138]:
df_myoglobin = pd.read_csv('DATA/myoglobin.txt', sep= ' ', names = ['PDB_ID', 'POSITION', 'WILD_RES', 'MUTANT_RES', 'EXP_DDG'])

In [139]:
print('Total dataset length', len(df_myoglobin))
pdb_ids = list(set([t.split()[0].upper() for t in df_myoglobin['PDB_ID'].to_list()]))
print('Total number of different chains in dataset', len(pdb_ids))

Total dataset length 134
Total number of different chains in dataset 1


In [140]:
for pdb_id in pdb_ids:
    if not os.path.isfile(f"PDB/{pdb_id[:4]}.pdb"):
        with open(f"PDB/{pdb_id[:4]}.pdb", "w") as fh:
            fh.write(pdb_client.get_pdb_file(f"{pdb_id[:4]}", compression=False))

In [141]:
wt = []
mut = []
ddg = []
pdb_ids = []
mut_infos = []
poss = []

#no_verbatim_pdb_ids = {'1C9OA', '1VQBA'}


print('Processing myoglobin')

for idx in tqdm(range(len(df_myoglobin))):
    pdb_id = df_myoglobin.iloc[idx]['PDB_ID'].upper()
    wild_aa = df_myoglobin.iloc[idx]['WILD_RES']
    pos = str(df_myoglobin.iloc[idx]['POSITION'])
    mutant_aa = df_myoglobin.iloc[idx]['MUTANT_RES']
    exp_ddg = df_myoglobin.iloc[idx]['EXP_DDG']
    
    #if pdb_id!= '1CLWA':
    #    continue
        
    
    pdb = PDBParser().get_structure(pdb_id[:4], f'PDB/{pdb_id[:4]}.pdb')
    chain = next(pdb.get_chains()).get_id()
        
    _, _, sequence, pdb2seq_pos, seq2pdb_pos = pdb2info(f'PDB/{pdb_id[:4]}.pdb', pdb_id[-1])
    
    #if pdb_id not in no_verbatim_pdb_ids:
    #  seq2pdb_pos = {str(i):str(i) for i in range(len(sequence))}
    
    if pos not in seq2pdb_pos:
        print(f'Indexing error for {pdb_id} position {pos} not present in mapping {seq2pdb_pos}')
        
    else:
        if sequence[int(seq2pdb_pos[pos])-1]!=wild_aa:
            print(f'Error for {pdb_id} expected {wild_aa} at position {pos} ')
            print(f'Sequence is {sequence}')
            print(f'Mapping is {seq2pdb_pos}')
        
        else:
            wt.append(sequence)
            tt = list(sequence)
            tt[int(seq2pdb_pos[pos])-1] = mutant_aa
            poss.append(int(seq2pdb_pos[pos])-1)
            mut.append(''.join(tt))
            ddg.append(exp_ddg)
            pdb_ids.append(pdb_id)
            mut_infos.append(str(wild_aa) + pos + str(mutant_aa))

Processing myoglobin


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 134/134 [00:14<00:00,  9.33it/s]


In [142]:
pd.DataFrame({'wt_seq': wt, 
              'mut_seq': mut ,
              'ddg': ddg, 
              'pdb_id': pdb_ids, 
              'mut_info': mut_infos,
              'pos': poss}).to_csv('DATASETS/myoglobin.csv')

In [143]:
pd.DataFrame({'wt_seq': mut, 
              'mut_seq': wt ,
              'ddg': [-t for t in ddg], 
              'pdb_id': pdb_ids, 
              'mut_info': [t[-1] + t[1:-1] + t[0] for t in mut_infos],
              'pos': poss}).to_csv('DATASETS/myoglobin_r.csv')

# P53

In [145]:
df_p53 = pd.read_csv('DATA/p53.txt', sep= ' ', names = ['PDB_ID', 'POSITION', 'WILD_RES', 'MUTANT_RES', 'EXP_DDG'])

In [146]:
print('Total dataset length', len(df_p53))
pdb_ids = list(set([t.split()[0].upper() for t in df_p53['PDB_ID'].to_list()]))
print('Total number of different chains in dataset', len(pdb_ids))

Total dataset length 42
Total number of different chains in dataset 1


In [147]:
for pdb_id in pdb_ids:
    if not os.path.isfile(f"PDB/{pdb_id[:4]}.pdb"):
        with open(f"PDB/{pdb_id[:4]}.pdb", "w") as fh:
            fh.write(pdb_client.get_pdb_file(f"{pdb_id[:4]}", compression=False))

In [148]:
wt = []
mut = []
ddg = []
pdb_ids = []
mut_infos = []
poss = []

print('Processing p53')

for idx in tqdm(range(len(df_myoglobin))):
    pdb_id = df_myoglobin.iloc[idx]['PDB_ID'].upper()
    wild_aa = df_myoglobin.iloc[idx]['WILD_RES']
    pos = str(df_myoglobin.iloc[idx]['POSITION'])
    mutant_aa = df_myoglobin.iloc[idx]['MUTANT_RES']
    exp_ddg = df_myoglobin.iloc[idx]['EXP_DDG']
    
        
    
    pdb = PDBParser().get_structure(pdb_id[:4], f'PDB/{pdb_id[:4]}.pdb')
    chain = next(pdb.get_chains()).get_id()
        
    _, _, sequence, pdb2seq_pos, seq2pdb_pos = pdb2info(f'PDB/{pdb_id[:4]}.pdb', pdb_id[-1])
    
    
    if pos not in seq2pdb_pos:
        print(f'Indexing error for {pdb_id} position {pos} not present in mapping {seq2pdb_pos}')
        
    else:
        if sequence[int(seq2pdb_pos[pos])-1]!=wild_aa:
            print(f'Error for {pdb_id} expected {wild_aa} at position {pos} ')
            print(f'Sequence is {sequence}')
            print(f'Mapping is {seq2pdb_pos}')
        
        else:
            wt.append(sequence)
            tt = list(sequence)
            tt[int(seq2pdb_pos[pos])-1] = mutant_aa
            poss.append(int(seq2pdb_pos[pos])-1)
            mut.append(''.join(tt))
            ddg.append(exp_ddg)
            pdb_ids.append(pdb_id)
            mut_infos.append(str(wild_aa) + pos + str(mutant_aa))

Processing p53


















100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 42/42 [00:09<00:00,  4.48it/s]


In [149]:
pd.DataFrame({'wt_seq': wt, 
              'mut_seq': mut ,
              'ddg': ddg, 
              'pdb_id': pdb_ids, 
              'mut_info': mut_infos,
              'pos': poss}).to_csv('DATASETS/p53.csv')