In [1]:
import pandas as pd
import numpy as np
import copy
import json

# Description

The script can be used to prepare filter lists by calculating mass from chemical formula.
Here it is used to calculate and add exact mass to a table of known compounds and their labelled versions.

In [2]:
def formula2mass(symbol2mass, isotope2mass, formula):
    '''
    Given a whitespace separated chemical formula, calculate the exact mass.
    Isotopes deviating from the naturally most abundant are indicated by 
    their nominal (integer) mass enclodes by brackets.
    E.g. 13C labelled glucose is: [13]C6 H12 O6 
    A mix of labelled and unlabelled is written separated e.g.
    glucose with half of carbons labelled is: C3 [13]C3 H12 O6 
    '''
    total_mass = 0
    for element in formula.split():
        for symbol in symbol2mass.keys():
            if symbol in element and '[' != element[0]:
                split_element = element.split(symbol)
                if split_element[1] != '':
                    N = split_element[1]
                else:
                    N = 1
                total_mass += float(N) * symbol2mass[symbol]['Mass']

            elif symbol in element and '[' == element[0]:
                end = element.index(']')
                isotope = element[1:end]
                element = element[(end+1):]

                split_element = element.split(symbol)
                if split_element[1] != '':
                    N = split_element[1]
                else:
                    N = 1
                total_mass += float(N) * isotope2mass[symbol][isotope]

    return(total_mass)

### Read exact mass table

In [3]:
# Read the exact mass of each element and the isotopes:
mass_df = pd.read_csv('element_masses.tab', sep='\t', comment='#')

# Convert this into dictionaries usefull for
# converting a chemical formula into a mass:
symbol2mass = dict()
isotope2mass = dict()
for s, a, m in zip(mass_df['Symbol'], mass_df['Abundance'], mass_df['Mass']):
    ei = s.index('(')
    ss = s[:ei]
    
    if ss in symbol2mass and symbol2mass[ss]['Abundance'] < a:
        symbol2mass[ss]['Mass'] = m
        symbol2mass[ss]['Abundance'] = a
    elif ss not in symbol2mass:
        symbol2mass[ss] = {
            'Mass': m,
            'Abundance': a        
        }
    N = str(round(m))
    if ss not in isotope2mass:
        isotope2mass[ss] = {N: m}
    else:
        isotope2mass[ss][N] = m

### Add a mass to a table of compounds
Given a tab separated table with a "Formula" column, calculated the exact mass and add it as a new column named "Mass".
If the "Mass" column already exists it will be overwritten.
Overwrite the old tab separated table file with a new containing a "Mass" column.

In [4]:
# Read known Gln labelled compounds and add mass column:
lab_df = pd.read_csv('known_gln_labeled.tab', sep='\t')
lab_df['Mass'] = [formula2mass(symbol2mass, isotope2mass, f) for f in lab_df['Formula']]
lab_df.to_csv('known_gln_labeled.tab', sep='\t', index=False)

# Read known Cys labelled compounds and add mass column:
lab_df = pd.read_csv('known_cys_labeled.tab', sep='\t')
lab_df['Mass'] = [formula2mass(symbol2mass, isotope2mass, f) for f in lab_df['Formula']]
lab_df.to_csv('known_cys_labeled.tab', sep='\t', index=False)