In [1]:
import libsbml as sb # SBML integration
import numpy as np # for making arrays
import pandas as pd # for dataframe conversion

In [2]:
# pulling data from the SBML file
reader = sb.SBMLReader() # open the reader
document = reader.readSBMLFromFile("C:/Users/camer/OneDrive - The University of Chicago/College/Classes/BIOS 26211/final project/iJO1366 E coli model/msb201165-sup-0003.xml") # read the file into document
model = document.getModel() # and extract the model from the file

In [3]:
## Making Connectivity Matrix

# setting up
specieslist = model.getListOfSpecies() # get a list of all the species
rxnslist = model.getListOfReactions() # and of all the reactions
numspecies = len(specieslist.getListOfAllElements()) # number of species (non-unique; if 1 species is in two compartments it is recorded twice with 2 different SIDs)
numrxns = 2582 # from literature (similarly non-unique)
speciesIDs = np.empty(numspecies,dtype=object) # prepare an array that stores only the IDs of every species
for s in range(numspecies):
    speciesIDs[s] = specieslist.get(s).getId() # and add in all the IDs


connect = np.zeros((numspecies,numspecies)) # make an N x N connectivity matrix where N is the number of species/substrates. we will have that if two substrates with speciesIDs indices i and j are linked, connect[i,j] = connect[j,i] = 1; otherwise 0.
for r in range(numrxns): # for every reaction
    rxn = rxnslist.get(r)
    numreacts = rxn.getNumReactants() # find the number of reactants
    numprods = rxn.getNumProducts() # and products
    if numreacts+numprods>1: # if there are links to record...
        substrates = np.empty(numreacts+numprods,dtype=int) # the total number of involved substrates will be reactants+products
        if numreacts>0: # if reactants are involved...
            for i in range(numreacts): # for every reactant...
                species = rxn.getReactant(i).getSpecies() # store its species id
                speciesindex = np.where(speciesIDs == species)[0][0] # store the corresponding speciesIDs index
                substrates[i] = speciesindex # add the speciesIDs index to the list of substrates
        if numprods>0: # now do the same for products
            for i in range(numprods):
                species = rxn.getProduct(i).getSpecies()
                speciesindex = np.where(speciesIDs == species)[0][0]
                substrates[i+numreacts] = speciesindex
        for i in np.arange(len(substrates)): # for each recorded substrate...
            linkindexes = np.delete(substrates,i) # make a list of the other recorded substrates
            for n in np.arange(len(linkindexes)): # and for each of them...
                connect[substrates[i],linkindexes[n]] = 1 # put a '1' in the linkage cell corresponding to the two linked substrates

In [4]:
connectpd = pd.DataFrame(connect) # convert to dataframe for file saving
connectpd.to_csv("C:/Users/camer/OneDrive - The University of Chicago/College/Classes/BIOS 26211/final project/iJO1366 E coli model/connectivity.csv") # save the file!