In [1]:
import pandas as pd
import itertools

In [2]:
# read in library design
CC_PEL = pd.read_excel('library_design/CC_PEL_design.xlsx')
CN_PEL = pd.read_excel('library_design/CN_PEL_design.xlsx')

#define monomer masses in dict
monomer_masses = {'a':85.0528,'b': 111.0684,'c': 113.0477,'d': 115.0633,'e': 125.0841,'f': 141.1151,'g': 153.1154,
                  'h': 207.0895,'A': 71.0371,'F': 147.0684,'K': 128.09496,'L': 113.0841,'P': 97.0528,'S': 87.0320,
                  'T': 101.0477,'V': 99.0684,'Y': 163.0633,'z': 59.0609,'H':1.0078}

#define dict of one and three letter code 
one_letter_code = {'a':'Abu','b': 'Cpa','c': 'Hyp','d': 'Mox','e': 'Cba','f': 'Aoa','g': 'Cha',
                    'h': 'Dmf','A': 'Ala','F': 'Phe','K': 'Lys','L': 'Leu','P': 'Pro','S': 'Ser',
                   'T': 'Thr','V': 'Val','Y': 'Tyr', '':'none'}

#transpose letter dict for decoding
transposed_one_letter_code = {key: value for (value, key) in one_letter_code.items()}

In [3]:
#mass list for CC_PEL

dfmass = CC_PEL

#translate three letter code to one letter code
dfmass = dfmass.replace({'set_1': transposed_one_letter_code})
dfmass = dfmass.replace({'set_2': transposed_one_letter_code})

#combine the single letter amino acids into the correpsonding codon
dfmass['codon'] = dfmass['set_1'] + dfmass['set_2']

#extract lists with possible codons generating subsets for combinatorial generation
linker_codons = dfmass.loc[dfmass['type'] == 'linker', 'codon'].values
triBB_codons = dfmass.loc[dfmass['type'] == 'triBB', 'codon'].values
CA_codons = dfmass.loc[dfmass['type'] == 'CA', 'codon'].values
BA_codons = dfmass.loc[dfmass['type'] == 'BA', 'codon'].values

#generate possible peptides using the subsets (starting from N-term)
pepset = ['A']
pepset = [i+str(j) for i in pepset for j in BA_codons]      
pepset = [i+'K' for i in pepset]
pepset = [i+str(j) for i in pepset for j in CA_codons] 
pepset = [i+'S' for i in pepset] 
pepset = [i+str(j) for i in pepset for j in triBB_codons]
pepset = [i+str(j) for i in pepset for j in linker_codons]
pepset = [i+'KzH' for i in pepset]

masslist = []

for pep in pepset:
    pepmass = 0
    for aminoacid in pep:
        pepmass += monomer_masses[aminoacid]
    masslist.append(pepmass)
    
#export unique mass in charge state 3 for targeted mass list
unique_z3 = sorted(set([round((x+3*1.0073)/3,4) for x in masslist]))
pd.DataFrame(zip(unique_z3,itertools.repeat(3))).to_csv("CC_PEL_masses.csv", index=True,header = False)
    
#Calculate percentage of unique masses
print('In the library of',len(pepset),'peptides, there are',
      len(unique_z3),'unique masses for the z=3 charge state (',round(len(unique_z3)/len(pepset)*100,1),'%).')

In the library of 39168 peptides, there are 949 unique masses for the z=3 charge state ( 2.4 %).


In [4]:
#mass list for CN_PEL

dfmass = CN_PEL

#translate three letter code to one letter code
dfmass = dfmass.replace({'set_1': transposed_one_letter_code})
dfmass = dfmass.replace({'set_2': transposed_one_letter_code})

#combine the single letter amino acids into the correpsonding codon
dfmass['codon'] = dfmass['set_1'] + dfmass['set_2']

#extract lists with possible codons generating subsets for combinatorial generation
linker_codons = dfmass.loc[dfmass['type'] == 'linker', 'codon'].values
triBB_codons = dfmass.loc[dfmass['type'] == 'triBB', 'codon'].values
CA_codons = dfmass.loc[dfmass['type'] == 'CA', 'codon'].values
aniline_codons = dfmass.loc[dfmass['type'] == 'aniline', 'codon'].values

#generate possible peptides using the subsets (starting from N-term)
pepset = ['V']
pepset = [i+str(j) for i in pepset for j in aniline_codons]      
pepset = [i+'K' for i in pepset]
pepset = [i+str(j) for i in pepset for j in CA_codons] 
pepset = [i+'S' for i in pepset] 
pepset = [i+str(j) for i in pepset for j in triBB_codons]
pepset = [i+str(j) for i in pepset for j in linker_codons]
pepset = [i+'KzH' for i in pepset]

masslist = []

for pep in pepset:
    pepmass = 0
    for aminoacid in pep:
        pepmass += monomer_masses[aminoacid]
    masslist.append(pepmass)
    
#export unique mass in charge state 3 for targeted mass list
unique_z3 = sorted(set([round((x+3*1.0073)/3,4) for x in masslist]))
pd.DataFrame(zip(unique_z3,itertools.repeat(3))).to_csv("CN_PEL_masses.csv", index=True,header = False)
    
#Calculate percentage of unique masses
print('In the library of',len(pepset),'peptides, there are',
      len(unique_z3),'unique masses for the z=3 charge state (',round(len(unique_z3)/len(pepset)*100,1),'%).')

In the library of 42432 peptides, there are 1116 unique masses for the z=3 charge state ( 2.6 %).
