# iCardio data assembly for compartments

### Setup

In [2]:
import xml.dom.minidom
import pandas as pd
import cobra
import json
import numpy as np

In [3]:
file = xml.dom.minidom.parse("icardio.xml")

### Dataframe

#### Dataframe makes connection between species names, compartments and IDs

In [5]:
model = file.documentElement
species = model.getElementsByTagName("species")

species_names = []
species_groups = []
species_ids = []

for element in species:
    species_names.append(element.getAttribute("name"))
    species_groups.append(element.getAttribute("compartment"))
    species_ids.append(element.getAttribute("id"))
     
dataframe = pd.DataFrame({"names": species_names, "groups": species_groups, "ids": species_ids})
dataframe.index = np.arange(1, len(dataframe)+1)
dataframe

Unnamed: 0,names,groups,ids
1,(10Z)-heptadecenoic acid,c,m00003__91__c__93__
2,(10Z)-heptadecenoic acid,r,m00003__91__r__93__
3,(10Z)-heptadecenoic acid,s,m00003__91__s__93__
4,(10Z)-heptadecenoyl-CoA,c,m00004__91__c__93__
5,(10Z)-heptadecenoyl-CoA,m,m00004__91__m__93__
...,...,...,...
2886,hyocholoyl-CoA,c,m90185__91__c__93__
2887,protein,c,m90190__91__c__93__
2888,glycogen storage pool,c,m90194__91__c__93__
2889,intracellular lipids,c,m90195__91__c__93__


### Secondary Dataframe

#### Dataframe makes connection between reactions and species (separated into reactants and products). The dataframe is also used to arrange the information into a format suitable for export.

In [6]:
list_of_reactions = []
reactions = model.getElementsByTagName("reaction")

for reaction in reactions:
    this_reaction = {}
    this_reaction["id"] = reaction.getAttribute("name")
    list_of_reactants = reaction.getElementsByTagName("listOfReactants")
    list_of_products = reaction.getElementsByTagName("listOfProducts")
    for reactant in list_of_reactants:
        this_reaction["Reactants"] = []
        species_reference = reactant.getElementsByTagName("speciesReference")
        for species in species_reference:
            this_reaction["Reactants"].append({"id": species.getAttribute("species"),
                                               "name": dataframe.loc[dataframe["ids"] == species.getAttribute("species"), "names"].array[0],
                                               "group": dataframe.loc[dataframe["ids"] == species.getAttribute("species"), "groups"].array[0]})
    for product in list_of_products:
        this_reaction["Products"] = []
        species_reference = product.getElementsByTagName("speciesReference")
        for species in species_reference:
            this_reaction["Products"].append({"id": species.getAttribute("species"),
                                               "name": dataframe.loc[dataframe["ids"] == species.getAttribute("species"), "names"].array[0],
                                               "group": dataframe.loc[dataframe["ids"] == species.getAttribute("species"), "groups"].array[0]})        
        
    list_of_reactions.append(this_reaction)

list_of_reactions[0:5]

[{'id': 'RCR10001',
  'Reactants': [{'id': 'm01285__91__c__93__', 'name': 'ADP', 'group': 'c'}],
  'Products': [{'id': 'm01334__91__c__93__', 'name': 'AMP', 'group': 'c'},
   {'id': 'm01371__91__c__93__', 'name': 'ATP', 'group': 'c'}]},
 {'id': 'RCR10002',
  'Reactants': [{'id': 'm01252__91__c__93__', 'name': 'acetate', 'group': 'c'},
   {'id': 'm01371__91__c__93__', 'name': 'ATP', 'group': 'c'},
   {'id': 'm01597__91__c__93__', 'name': 'CoA', 'group': 'c'}],
  'Products': [{'id': 'm01261__91__c__93__',
    'name': 'acetyl-CoA',
    'group': 'c'},
   {'id': 'm01334__91__c__93__', 'name': 'AMP', 'group': 'c'},
   {'id': 'm02759__91__c__93__', 'name': 'PPi', 'group': 'c'}]},
 {'id': 'RCR10004',
  'Reactants': [{'id': 'm01285__91__m__93__', 'name': 'ADP', 'group': 'm'}],
  'Products': [{'id': 'm01334__91__m__93__', 'name': 'AMP', 'group': 'm'},
   {'id': 'm01371__91__m__93__', 'name': 'ATP', 'group': 'm'}]},
 {'id': 'RCR10005',
  'Reactants': [{'id': 'm01369__91__c__93__',
    'name': 'as

### Selector

#### The selector filters nodes and links according to the different compartments (here it is called a "group"). The resulting information is also organised into a format suitable for export.

In [98]:
nodes = []
links = []
pseudo_nodes_s = []
pseudo_nodes_e = []
selector = "n"
selected_list_of_reactions = []


for index, item in enumerate(species_ids):
    if dataframe.loc[dataframe["ids"] == item, "groups"].array[0] == selector:
        nodes.append({"id": item, "name": species_names[index], "group": species_groups[index]})

for item in list_of_reactions:
    for reactants in item["Reactants"]:
            if reactants["group"] == selector and item not in selected_list_of_reactions:
                selected_list_of_reactions.append(item)
    if "Products" in item:
        for products in item["Products"]:
            if products["group"] == selector and item not in selected_list_of_reactions:
                selected_list_of_reactions.append(item)

for item in selected_list_of_reactions:
    if "Products" in item:
        pseudo_nodes_s.append({"id": item["id"] + "s", "name": item["id"] + "s", "group": "z"})
        pseudo_nodes_e.append({"id": item["id"] + "e", "name": item["id"] + "e", "group": "z"})
        for reactant in item["Reactants"]:
            links.append({"source": reactant["id"], "target": item["id"] + "s", "arrow": False})
            if reactant not in nodes:
                nodes.append(reactant)
        for product in item["Products"]:
            links.append({"source": item["id"] + "e", "target": product["id"], "arrow": True})
            if product not in nodes:
                nodes.append(product)

for index, item in enumerate(pseudo_nodes_s):
    links.append({"source": item["id"], "target": pseudo_nodes_e[index]["id"], "arrow": False})

all_nodes = nodes + pseudo_nodes_s + pseudo_nodes_e

### Collate data into format suitable for JSON export and further use

In [99]:
icardio_n_data= {"nodes": [], "links": []}
icardio_n_data["nodes"] += all_nodes
icardio_n_data["links"] += links

### Export

In [7]:
json_string = json.dumps(icardio_n_data)

with open('icardio_n_data.json', 'w', encoding='utf-8') as f:
    json.dump(icardio_n_data, f, ensure_ascii=False, indent=4)

NameError: name 'icardio_n_data' is not defined