In [40]:
# Importing packages
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import string

# COBRA toolbox specific packages
from cobra import Model, Reaction, Metabolite
import cobra
from cobra.flux_analysis import (
    single_gene_deletion, single_reaction_deletion, double_gene_deletion,
    double_reaction_deletion)

In [75]:
# Loading mass spec. data into dataframe
df_AB_mass_spec_data = pd.read_csv("Acinetobacter_baumannii_mass_spec.csv")
df_AB_mass_spec_data_important_features = df_AB_mass_spec_data.loc[:, 'Mass':'Max intensity'] # extracting important features
# df_AB_mass_spec_data_important_features.head()

df_AB_mass_spec_minimal = df_AB_mass_spec_data_important_features[['Mass','Formula','Putative metabolite']] # Mass, formula, name
df_AB_mass_spec_minimal.head()

Unnamed: 0,Mass,Formula,Putative metabolite
0,189.063754,C7H11NO5,N-Acetyl-L-glutamate
1,147.052987,C5H9NO4,L-Glutamate
2,175.095627,C6H13N3O3,L-Citrulline
3,221.089958,C8H15NO6,N-Acetyl-D-glucosamine
4,259.045942,C6H14NO8P,D-Glucosamine 6-phosphate


In [76]:
# Loading Acinetobacter baumannii network reconstruction 'iCN718'
AB_model=cobra.io.load_json_model('iCN718.json');

In [5]:
# Writing function that calculates molar mass from chemical formula
def calc_mass_from_formula( formula ):
    
    # Typical mass of compounds --- assumes non-monoisotopic
    C = 12.0107 # +/- 0.0008
    H = 1.00794 # +/- 0.00001
    N = 14.0067 # +/- 0.0001
    O = 15.9994 # +/-
    P = 30.973762 # +/- 0.000002
    S =  32.065 # +/- 0.005

    # Initializing new string for first round of alteration (i.e., adding the '+')
    new_str = ''

    # Adding a '+' before every element
    for index in range (0, len(formula)):
        temp_str = formula[index]
        if temp_str.isalpha():
            new_str = new_str + '+' + temp_str 
        elif temp_str.isnumeric():
            new_str = new_str + temp_str
            
    # Removing the '+' at teh beginning of each string
    if new_str[0] == '+':
        str_formula_add = new_str[1:]

    # Initializing new string for second round of alteration (i.e., adding the '*')
    final_str_formula = ''

    # Adding a '*' after each element that is followed by a number
    for index in range(0,len(str_formula_add)):
        temp_str_1 = str_formula_add[index]
        if index != len(str_formula_add)-1:
            temp_str_2 = str_formula_add[index + 1]
        if temp_str_1.isalpha() and temp_str_2.isnumeric():
            final_str_formula = final_str_formula + temp_str_1 + '*'
        else: 
            final_str_formula = final_str_formula + temp_str_1

    # Evaludating string expression and returning molecular mass to user
    molecular_mass = eval(final_str_formula)    
    return molecular_mass



In [8]:
# Test case of function (output should be 189.1659 )
mass = calc_mass_from_formula('C7H11NO5')
print(mass)

189.16593999999998


In [33]:
# # Parsing text file to obtain lists of chemical names, chemical formulas, and monoisotopic molecular weights


# # Parser variables: 
# # COMMON-Name - chemical name {}
# # INCHI - InChI=1S/chemicalFormula ....
# # MONOISOTOPIC-MW - weight {} 

# compounds_unparsed = open('A_Baumannii_compounds.txt','r')

# chemical_names_parse = []
# chemical_formula_parse = []
# monoisotopic_MW_parse = []
# counter = 0
# while True:
#     text = compounds_unparsed.readline()
#     #print(text)
#     if 'COMMON-NAME - ' in text:
#         chemical_name_split = text.split(' - ')
#         chemical_names_parse.append(chemical_name_split[1]) 
#         track = 0
#     if '1S/' in text: # 1S/ found before every checmical formula; added the 'track' counter because 1S/ appears twice for some compounds
#         if track == 0:
#             chemical_formula_split = text.split('/')
#             chemical_formula_parse.append(chemical_formula_split[1])
#             track = 1 # Shuts this off until a new chemical name is found
#     if 'MONOISOTOPIC-MW -' in text:
#         monoisotopic_MW_split = text.split(' - ')
#         monoisotopic_MW_parse.append(monoisotopic_MW_split[1])
        
#     #conter = counter + 1
#     #if conter%20 == 0:
#         #print(counter)

In [89]:
# Parsing data A_Baumannii file containing compounds

with open('A_Baumannii_compounds.txt') as f:
    # counter = 0
    # common_name = []
    # inchi = []
    # mono_mw = []
    start = False
    compounds = []
    for line in f:
        if line.strip().split()[0] =='UNIQUE-ID':
            start = True
            compound = {'COMMON-NAME':'','INCHI':'', 'MONOISOTOPIC-MW':0}
        if line.strip() == '//':
            start = False
            compounds.append(list(compound.values()))
        if start:
            line = line.strip().split()
            if line[0] == 'COMMON-NAME':
                compound['COMMON-NAME'] = ''.join(line[2:])
            if line[0] == 'INCHI':
                compound['INCHI'] = line[2].split('/')[1]
            if line[0] == 'MONOISOTOPIC-MW':
                compound['MONOISOTOPIC-MW'] = line[2]
    print(len(compounds))                    
    print(compounds[0])

1216
['322.0123269261', 'C11H12Cl2N2O5', 'chloramphenicol']


In [100]:
compounds

[['322.0123269261', 'C11H12Cl2N2O5', 'chloramphenicol'],
 ['2081.1845825124',
  'C100H182N2O38P2',
  'KDO<SUB>2</SUB>-(palmitoleoyl)-lipidIV<SUB>A</SUB>'],
 ['43.0058136594', 'CHNO', 'cyanate'],
 ['152.0473441231', 'C8H8O3', '4-hydroxyphenylacetate'],
 ['1003.2200673676001',
  'C34H52N7O20P3S',
  '3-[(3a<i>S</i>,4<i>S</i>,5<i>R</i>,7a<i>S</i>)-5-hydroxy-7a-methyl-1-oxo-octahydro-1<i>H</i>-inden-4-yl]-3-oxopropanoyl-CoA'],
 ['1005.2357174318',
  'C34H54N7O20P3S',
  '3-[(3a<i>S</i>,4<i>S</i>,5<i>R</i>,7a<i>S</i>)-5-hydroxy-7a-methyl-1-oxo-octahydro-1<i>H</i>-inden-4-yl]-3-hydroxypropanoyl-CoA'],
 ['971.1574671108', 'C32H44N7O20P3S', 'benzoylsuccinyl-CoA'],
 ['973.1731171749999',
  'C32H46N7O20P3S',
  '2-carboxymethyl-3-hydroxyphenylpropionyl-CoA'],
 ['18.0105646863', 'H2O', 'H<sub>2</sub>O'],
 ['929.1469024244999', 'C30H42N7O19P3S', '4-hydroxybenzoyl-acetyl-CoA'],
 ['931.1625524887',
  'C30H44N7O19P3S',
  '3-(4-hydroxyphenyl)-3-hydroxy-propionyl-CoA'],
 ['43.989829244199996', 'CO2', 'CO<

In [88]:
# Create new dataframe for comparison
df_AB_mass_spec_mapping = df_AB_mass_spec_minimal.copy()
df_AB_mass_spec_mapping['mapped_MW'] = ''
df_AB_mass_spec_mapping['mapped_formula'] = ''
df_AB_mass_spec_mapping['mapped_metabs'] = ''

df_AB_mass_spec_mapping

df_AB_mass_spec_mapping.loc[1,'mapped_MW'] = 'test'
df_AB_mass_spec_mapping

Unnamed: 0,Mass,Formula,Putative metabolite,mapped_MW,mapped_formula,mapped_metabs
0,189.063754,C7H11NO5,N-Acetyl-L-glutamate,,,
1,147.052987,C5H9NO4,L-Glutamate,test,,
2,175.095627,C6H13N3O3,L-Citrulline,,,
3,221.089958,C8H15NO6,N-Acetyl-D-glucosamine,,,
4,259.045942,C6H14NO8P,D-Glucosamine 6-phosphate,,,
5,115.063317,C5H9NO2,L-Proline,,,
6,131.094586,C6H13NO2,L-Leucine,,,
7,117.078934,C5H11NO2,L-Valine,,,
8,105.042676,C3H7NO3,L-Serine,,,
9,384.122018,C14H20N6O5S,S-Adenosyl-L-homocysteine,,,


In [116]:
# List of candidates 
# ppm = mass error / exact mass * 10^6

counter = 0

for mass_measured in df_AB_mass_spec_mapping['Mass']:
    for compound in compounds:
        candidate_metabs_mass = []
        candidate_metabs_formula = []
        candidate_metabs_name = []
        
        candidate_mass = compound[0]
        candidate_formula = compound[1]
        candidate_name = compound[2]
        
        if float(candidate_mass) > 0:
            error_ppm =  (abs((float(candidate_mass) - mass_measured)) / float(candidate_mass)) * (10**6)
        
        if error_ppm < 25:
            candidate_metabs_mass.append(candidate_mass)
            candidate_metabs_formula.append(candidate_name)
            candidate_metabs_name.append(candidate_name)
            # print('got one') - verified to work
            
    candidate_metabs_mass_comb = ', '.join(map(str,candidate_metabs_mass))
    candidate_metabs_formula_comb = ', '.join(map(str,candidate_metabs_formula))
    candidate_metabs_name_comb = ', '.join(map(str,candidate_metabs_name))
    
    df_AB_mass_spec_mapping.loc[counter,'mapped_MW'] = candidate_metabs_mass_comb
    df_AB_mass_spec_mapping.loc[counter,'mapped_formula'] = candidate_metabs_formula_comb
    df_AB_mass_spec_mapping.loc[counter,'mapped_metabs'] = candidate_metabs_name_comb
    
    counter += 1



        

got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one
got one


In [115]:
df_AB_mass_spec_mapping

Unnamed: 0,Mass,Formula,Putative metabolite,mapped_MW,mapped_formula,mapped_metabs
0,189.063754,C7H11NO5,N-Acetyl-L-glutamate,,,
1,147.052987,C5H9NO4,L-Glutamate,,,
2,175.095627,C6H13N3O3,L-Citrulline,,,
3,221.089958,C8H15NO6,N-Acetyl-D-glucosamine,,,
4,259.045942,C6H14NO8P,D-Glucosamine 6-phosphate,,,
5,115.063317,C5H9NO2,L-Proline,,,
6,131.094586,C6H13NO2,L-Leucine,,,
7,117.078934,C5H11NO2,L-Valine,,,
8,105.042676,C3H7NO3,L-Serine,,,
9,384.122018,C14H20N6O5S,S-Adenosyl-L-homocysteine,,,
