In [1]:
# Importing packages
import pandas as pd
from matplotlib import pyplot as plt
import string

# COBRA toolbox specific packages
from cobra import Model, Reaction, Metabolite
import cobra
from cobra.flux_analysis import (
    single_gene_deletion, single_reaction_deletion, double_gene_deletion,
    double_reaction_deletion)

In [2]:
# Loading mass spec. data into dataframe
df_AB_mass_spec_data = pd.read_csv("Acinetobacter_baumannii_mass_spec.csv")
df_AB_mass_spec_data_important_features = df_AB_mass_spec_data.loc[:, 'Mass':'Max intensity'] # extracting important features
# df_AB_mass_spec_data_important_features.head()

df_AB_mass_spec_minimal = df_AB_mass_spec_data_important_features[['Mass','Formula','Putative metabolite']] # Mass, formula, name
df_AB_mass_spec_minimal.head()

Unnamed: 0,Mass,Formula,Putative metabolite
0,189.063754,C7H11NO5,N-Acetyl-L-glutamate
1,147.052987,C5H9NO4,L-Glutamate
2,175.095627,C6H13N3O3,L-Citrulline
3,221.089958,C8H15NO6,N-Acetyl-D-glucosamine
4,259.045942,C6H14NO8P,D-Glucosamine 6-phosphate


In [128]:
# Loading Acinetobacter baumannii network reconstruction 'iCN718'
AB_model=cobra.io.load_json_model('iCN718.json');

In [98]:
# Writing function that calculates molar mass from chemical formula
def calc_mass_from_formula( formula ):
    
    # Typical mass of compounds --- assumes non-monoisotopic
    C = 12.0107 # +/- 0.0008
    H = 1.00794 # +/- 0.00001
    N = 14.0067 # +/- 0.0001
    O = 15.9994 # +/-
    P = 30.973762 # +/- 0.000002
    S =  32.065 # +/- 0.005

    # Initializing new string for first round of alteration (i.e., adding the '+')
    new_str = ''

    # Adding a '+' before every element
    for index in range (0, len(formula)):
        temp_str = formula[index]
        if temp_str.isalpha():
            new_str = new_str + '+' + temp_str 
        elif temp_str.isnumeric():
            new_str = new_str + temp_str
            
    # Removing the '+' at teh beginning of each string
    if new_str[0] == '+':
        str_formula_add = new_str[1:]

    # Initializing new string for second round of alteration (i.e., adding the '*')
    final_str_formula = ''

    # Adding a '*' after each element that is followed by a number
    for index in range(0,len(str_formula_add)):
        temp_str_1 = str_formula_add[index]
        if index != len(str_formula_add)-1:
            temp_str_2 = str_formula_add[index + 1]
        if temp_str_1.isalpha() and temp_str_2.isnumeric():
            final_str_formula = final_str_formula + temp_str_1 + '*'
        else: 
            final_str_formula = final_str_formula + temp_str_1

    # Evaludating string expression and returning molecular mass to user
    molecular_mass = eval(final_str_formula)    
    return molecular_mass



In [103]:
# Test case of function (output should be 189.1659 )
mass = calc_mass_from_formula('C7H11NO5')

189.16593999999998


In [153]:
compounds_unparsed = open('A_Baumannii_compounds.txt', 'r') 

# 
#
# Authors:
#    Pallavi Subhraveti
#    Quang Ong
#    Ingrid Keseler
#    Tim Holland
#    Anamika Kothari
#    Ron Caspi
#    Peter D Karp
#
# Please see the license agreement regarding the use of and distribution of this file.
# The format of this file is defined at http://bioinformatics.ai.sri.com/ptools/flatfile-format.html .
#
# Species: Acinetobacter baumannii ATCC 19606 = CIP 70.34
# Database: Acinetobacter baumannii ATCC 19606, whole genome shotgun sequencing project.
# Version: 18.0
# File Name: compounds.dat
# Date and time generated: October 28, 2014, 23:00:20
#
# Attributes:
#    UNIQUE-ID
#    TYPES
#    COMMON-NAME
#    ABBREV-NAME
#    ANTICODON
#    ATOM-CHARGES
#    CATALYZES
#    CFG-ICON-COLOR
#    CHEMICAL-FORMULA
#    CITATIONS
#    CODONS
#    COFACTORS-OF
#    COFACTORS-OR-PROSTHETIC-GROUPS-OF
#    COMMENT
#    COMMENT-INTERNAL
#    COMPONENT-COEFFICIENTS
#    COMPONENT-OF
#    COMPONENTS
#    CONSENSUS-SEQUENCE
#    CREDITS
#    DATA-SOURCE
#    DBLINKS
# 