# CSCI 3352 Project

**Names:** Tom Stoughton, Dylan Smith

Summary: ...

In [114]:
# network/comp. bio. packages
import networkx as nx
import cobra
from cobra.io import read_sbml_model
import netwulf
from netwulf import visualize

# helper packages
import os
import numpy as np
import matplotlib.pyplot as plt
from collections import Counter

%matplotlib inline

The following function is used to generate a metabolic interaction network from the SBML model of different genomes in the Mycoplasma genus.

In [26]:
# set up networks here

# iterate over model.reactions and model.metabolites to construct bipartite directed network
def makeNetworkFromSBML(model):
    G = nx.DiGraph()
    nodes_m = {}
    nodes_r = {}

    for metabolite in model.metabolites:
        nodes_m[metabolite.id] = metabolite.name
        
    for reaction in model.reactions:
        nodes_r[reaction.id] = reaction.name
    
    # add nodes with metabolite formula/enzyme name as attribute for id
    G.add_nodes_from([(m_id, {'name': name}) for (m_id, name) in nodes_m.items()], bipartite=0)
    G.add_nodes_from([(r_id, {'name': name}) for (r_id, name) in nodes_r.items()], bipartite=1)
    
    edges = []
    for reaction in model.reactions:
        # get products and reactants
        products  = reaction.products
        reactants = reaction.reactants
        enzyme = reaction.name
        rid = reaction.id
        
        for p in products:
            edges.append((rid, p.id))
            if reaction.reversibility:
                edges.append((p.id, rid))
                
        for r in reactants:
            edges.append((r.id, rid))
            if reaction.reversibility:
                edges.append((rid, r.id))
            
        
    G.add_edges_from(edges)
    return G


In [27]:
# create list of metabolic networks, one for each genome
networks = {}
models = {}
sbml_dir = 'sbml_files'
for sbml_file in os.listdir(sbml_dir):
    # print(sbml_file)
    model = read_sbml_model(os.path.join(sbml_dir, sbml_file))
    models[sbml_file] = model
    networks[sbml_file] = makeNetworkFromSBML(model)

In [81]:
# for f, n in networks.items():
#     print(f,':', nx.get_node_attributes(n, "name"))

## Create a network of enzymes. 

*This method will connect two enzymes with a weighted directed edge ($i$ &rarr; $j$, $w$) where the weight $w$ of the edge is the number of products of enzyme $i$ that are the reactants of enzyme $j$.*

### Step 1: 
Create a list of all the enzymes.

In [64]:
enzymes = set()

for model in models:
    enzymes.update(set(r.id for r in model.reactions))

### Step 2:

Take one enzyme, and iterate through every metabolite that is a product of that enzyme (in any network, not that I expect a reaction to produce something different in different networks), and find reactions that have said metabolite as a reactant.

In [122]:
def get_enzyme_dict(enzymes, networks):
    enzyme_dict = {}
    
    for e in enzymes:
    
        # run through each network, accumulate products of enzyme reaction
        products = set()
        matches = []

        for G in networks.values():
            if e in G.nodes:
                ps = G[e]
                products.update(ps)

                # print([G[p] for p in list(ps.keys())])

                # get enzymes that are pointed to by product
                for p in list(ps.keys()):
                    for n in G[p]:
                        matches.append(n)
                        
        # convert to a dictionary counting the number of each entry
        matches = dict(Counter(matches))
        
        # dont self-loop
        if e in matches:
            del matches[e]

        enzyme_dict[e] = matches
    
    return enzyme_dict

In [137]:
get_enzyme_dict(enzymes, networks)

{'rxn12977_c': {'rxn09464_c': 22,
  'rxn33907_c': 22,
  'rxn14120_c': 22,
  'rxn13341_c': 22,
  'cofactor_rxn': 22,
  'rxn16350_c': 22,
  'rxn16505_c': 22,
  'rxn00183_c': 22,
  'rxn19217_c': 22,
  'rxn09469_c': 22,
  'rxn30524_c': 22,
  'rxn09467_c': 22,
  'rxn10770_c': 22,
  'rxn08094_c': 33,
  'rxn05747_c': 22,
  'rxn00078_c': 11,
  'rxn11546_c': 11,
  'rxn10030_c': 11,
  'rxn04343_c': 11,
  'rxn00305_c': 11,
  'rxn00114_c': 22,
  'rxn18920_c': 11,
  'rxn10665_c': 11,
  'rxn15626_c': 11,
  'rxn28196_c': 11,
  'rxn42899_c': 11,
  'rxn41598_c': 11,
  'rxn20288_c': 11,
  'rxn19870_c': 11,
  'rxn34108_c': 11,
  'EX_cpd00033_e': 11,
  'EX_cpd00087_e': 11,
  'rxn12868_c': 11,
  'rxn00105_c': 11,
  'rxn11305_c': 11,
  'rxn47591_c': 11,
  'rxn16003_c': 5,
  'rxn39930_c': 12},
 'rxn21635_c': {'rxn00124_c': 22,
  'biomass_GmNeg': 11,
  'rxn28196_c': 11,
  'rxn09464_c': 22,
  'rxn00121_c': 11,
  'rxn33907_c': 22,
  'rxn13341_c': 22,
  'rxn08815_c': 22,
  'rxn13724_c': 11,
  'rxn03028_c': 11,
 

### Step 3:

Create the network from this dictionary by formatting it in a way where the number of edges from enzyme $e1$ to a product to another enzyme $e2$ is the weight of that edge.

In [133]:
def create_enzyme_network(enzymes, networks):
    G = nx.DiGraph()
    G.add_nodes_from(enzymes)
    d = get_enzyme_dict(enzymes, networks)
    
    # iterate through keys, then format the corresponding dictionary
    # to be an input for nx.add_weighted_edges_from
    for e in d.keys():
        neighbors = d[e]
        # print(neighbors)
        edge_list = [(e, n, w) for n, w in neighbors.items()]
        G.add_weighted_edges_from(edge_list)
    print(G)

In [134]:
create_enzyme_network(enzymes, networks)

DiGraph with 340 nodes and 0 edges
DiGraph with 340 nodes and 22786 edges


In [60]:
# check if reaction IDs are consistent throughout models
# rs = []
# for m in models:
#     rs.append(m.reactions)
    
# for r1 in rs:
#     for r2 in rs:
#         if r1 != r2:
#             for reaction in r1:
#                 rid = reaction.id
#                 reaction2 = r2.query(lambda x: x.id == rid)
#                 if reaction2:
#                     if reaction.name != reaction2[0].name:
#                         print("These names don't align!")
#                         print(rid, rid2)
#                         print(reaction.name, reaction2.name)
#                         print(r1)
#                         print(r2)

# print("Done!")
            
# Confirms that IDs are consistent throughout models, 
# i.e. if two IDs match from different models, they have the same name/formula

Done!


In [None]:
# clustering analysis