In [40]:
# Importing packages
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import string

# COBRA toolbox specific packages
from cobra import Model, Reaction, Metabolite
import cobra
from cobra.flux_analysis import (
    single_gene_deletion, single_reaction_deletion, double_gene_deletion,
    double_reaction_deletion)

In [52]:
# Loading mass spec. data into dataframe
df_AB_mass_spec_data = pd.read_csv("Acinetobacter_baumannii_mass_spec.csv")
df_AB_mass_spec_data_important_features = df_AB_mass_spec_data.loc[:, 'Mass':'Max intensity'] # extracting important features
# df_AB_mass_spec_data_important_features.head()

df_AB_mass_spec_minimal = df_AB_mass_spec_data_important_features[['Mass','Formula','Putative metabolite']] # Mass, formula, name
df_AB_mass_spec_minimal.head()

Unnamed: 0,Mass,Formula,Putative metabolite
0,189.063754,C7H11NO5,N-Acetyl-L-glutamate
1,147.052987,C5H9NO4,L-Glutamate
2,175.095627,C6H13N3O3,L-Citrulline
3,221.089958,C8H15NO6,N-Acetyl-D-glucosamine
4,259.045942,C6H14NO8P,D-Glucosamine 6-phosphate


In [4]:
# Loading Acinetobacter baumannii network reconstruction 'iCN718'
AB_model=cobra.io.load_json_model('iCN718.json');

In [5]:
# Writing function that calculates molar mass from chemical formula
def calc_mass_from_formula( formula ):
    
    # Typical mass of compounds --- assumes non-monoisotopic
    C = 12.0107 # +/- 0.0008
    H = 1.00794 # +/- 0.00001
    N = 14.0067 # +/- 0.0001
    O = 15.9994 # +/-
    P = 30.973762 # +/- 0.000002
    S =  32.065 # +/- 0.005

    # Initializing new string for first round of alteration (i.e., adding the '+')
    new_str = ''

    # Adding a '+' before every element
    for index in range (0, len(formula)):
        temp_str = formula[index]
        if temp_str.isalpha():
            new_str = new_str + '+' + temp_str 
        elif temp_str.isnumeric():
            new_str = new_str + temp_str
            
    # Removing the '+' at teh beginning of each string
    if new_str[0] == '+':
        str_formula_add = new_str[1:]

    # Initializing new string for second round of alteration (i.e., adding the '*')
    final_str_formula = ''

    # Adding a '*' after each element that is followed by a number
    for index in range(0,len(str_formula_add)):
        temp_str_1 = str_formula_add[index]
        if index != len(str_formula_add)-1:
            temp_str_2 = str_formula_add[index + 1]
        if temp_str_1.isalpha() and temp_str_2.isnumeric():
            final_str_formula = final_str_formula + temp_str_1 + '*'
        else: 
            final_str_formula = final_str_formula + temp_str_1

    # Evaludating string expression and returning molecular mass to user
    molecular_mass = eval(final_str_formula)    
    return molecular_mass



In [8]:
# Test case of function (output should be 189.1659 )
mass = calc_mass_from_formula('C7H11NO5')
print(mass)

189.16593999999998


In [33]:
# # Parsing text file to obtain lists of chemical names, chemical formulas, and monoisotopic molecular weights


# # Parser variables: 
# # COMMON-Name - chemical name {}
# # INCHI - InChI=1S/chemicalFormula ....
# # MONOISOTOPIC-MW - weight {} 

# compounds_unparsed = open('A_Baumannii_compounds.txt','r')

# chemical_names_parse = []
# chemical_formula_parse = []
# monoisotopic_MW_parse = []
# counter = 0
# while True:
#     text = compounds_unparsed.readline()
#     #print(text)
#     if 'COMMON-NAME - ' in text:
#         chemical_name_split = text.split(' - ')
#         chemical_names_parse.append(chemical_name_split[1]) 
#         track = 0
#     if '1S/' in text: # 1S/ found before every checmical formula; added the 'track' counter because 1S/ appears twice for some compounds
#         if track == 0:
#             chemical_formula_split = text.split('/')
#             chemical_formula_parse.append(chemical_formula_split[1])
#             track = 1 # Shuts this off until a new chemical name is found
#     if 'MONOISOTOPIC-MW -' in text:
#         monoisotopic_MW_split = text.split(' - ')
#         monoisotopic_MW_parse.append(monoisotopic_MW_split[1])
        
#     #conter = counter + 1
#     #if conter%20 == 0:
#         #print(counter)

In [32]:
# Parsing data A_Baumannii file containing compounds

with open('A_Baumannii_compounds.txt') as f:
    counter = 0
    # common_name = []
    # inchi = []
    # mono_mw = []
    start = False
    compounds = []
    for line in f:
        if line.strip().split()[0] =='UNIQUE-ID':
            start = True
            compound = {'COMMON-NAME':'','INCHI':'', 'MONOISOTOPIC-MW':0}
        if line.strip() == '//':
            start = False
            compounds.append(list(compound.values()))
        if start:
            line = line.strip().split()
            if line[0] == 'COMMON-NAME':
                compound['COMMON-NAME'] = ''.join(line[2:])
            if line[0] == 'INCHI':
                compound['INCHI'] = line[2].split('/')[1]
            if line[0] == 'MONOISOTOPIC-MW':
                compound['MONOISOTOPIC-MW'] = line[2]
    print(len(compounds))                    
    print(compounds[0])

1216
['322.0123269261', 'C11H12Cl2N2O5', 'chloramphenicol']


In [58]:
# Create new dataframe for comparison
df_AB_mass_spec_mapping = df_AB_mass_spec_minimal.copy()
df_AB_mass_spec_mapping['mapped_metabs'] = ''
df_AB_mass_spec_mapping['mapped_MW'] = ''
df_AB_mass_spec_mapping

Unnamed: 0,Mass,Formula,Putative metabolite,mapped_metabs,mapped_MW
0,189.063754,C7H11NO5,N-Acetyl-L-glutamate,,
1,147.052987,C5H9NO4,L-Glutamate,,
2,175.095627,C6H13N3O3,L-Citrulline,,
3,221.089958,C8H15NO6,N-Acetyl-D-glucosamine,,
4,259.045942,C6H14NO8P,D-Glucosamine 6-phosphate,,
5,115.063317,C5H9NO2,L-Proline,,
6,131.094586,C6H13NO2,L-Leucine,,
7,117.078934,C5H11NO2,L-Valine,,
8,105.042676,C3H7NO3,L-Serine,,
9,384.122018,C14H20N6O5S,S-Adenosyl-L-homocysteine,,


In [60]:
# List of candidates 
# ppm = mass error / exact error * 10^6

for mass in df_AB_mass_spec_mapping['Mass']:
    ppm_error

189.06375369999998
147.0529866
175.09562659999997
221.08995819999998
259.0459418
115.06331670000002
131.09458590000003
117.07893429999999
105.04267649999998
384.1220184
89.04783641
149.0510017
165.0788867
130.0630956
116.047418
181.0738797
192.0270034
607.0817936000001
276.0246763
146.0215797
185.9929323
134.021592
169.99801619999997
118.0267115
116.0110328
88.0159012
167.9823322
342.11609460000005
196.0581771
663.1093179999999
172.0136875
123.0320818
363.0580183
482.98455199999995
151.04930900000002
324.0357815
322.05627139999996
522.9909251
404.0018922
111.04331940000002
323.05192969999996
347.06300439999995
152.03347030000003
307.0569005
136.0384421
443.0244488
168.0283351
188.0797178
188.1159925
148.037227
422.0824716
182.0789255
200.1776907
166.02662450000003
390.1744017
275.1008228
113.94464860000001
308.1585261
828.2747046000001
174.0163867
122.0368659
275.10011460000004
612.1522951000001
297.0898784
290.1227847
185.0088592
679.1028075
182.0579201
132.08974469999998
161.10515980