## RDF and OBO to NDEx and Back

This example uses pronto for parsing and generating OBO and RDF files.

In [1]:
#Uncomment the next line if you need to install the pronto module
#!pip install pronto
import ndex2
import pronto

In [2]:
my_username = 'drh'
my_password = 'drh'
my_ndex_server = 'public.ndexbio.org'

Load the Protein Modification Ontology from an OBO file

In [4]:
print("LOADING PSI-MOD...")
ontology = pronto.Ontology("../resources/PSI-MOD.obo")
#ontology = pronto.Ontology("http://purl.obolibrary.org/obo/go/go-basic.obo")
#clear_output()
print("DONE!")

LOADING PSI-MOD...
DONE!


In [5]:

def create_term_map(annotation_file_path):
    header = ["id_type",
              "id",
              "gene_symbol",
              "x",
              "go_id",
              "reference_id",
              "evidence_type"]

    term_map = {}

    # Each row in the gene annotation file maps a gene to a GO term
    # plus data about the gene and the source of the annotation

    # We therefore first populate an inverse map: for each GO term id, a set of gene symbols
    # (this could be more sophisticated in the future, but first we will do the minimum work)

    gene_symbol_set = set()

    with open(annotation_file_path, 'rU') as tsvfile:
        reader = csv.DictReader(filter(lambda row: row[0] != '#', tsvfile), dialect='excel-tab', fieldnames=header)
        for row in reader:
            gene_symbol = row.get("gene_symbol")
            term_id = row.get("go_id")
           # print str(gene_symbol) + str(term_id)
            if gene_symbol and term_id:
                term_attributes = term_map.get(term_id)

                if not term_attributes:
                    term_attributes = {"my_genes": [], "term_ids": [term_id]}
                    term_map[term_id] = term_attributes

                my_genes = term_attributes["my_genes"]
                my_genes.append(gene_symbol)


    # remove duplicates
    for term_id in term_map:
        attributes = term_map[term_id]
        attributes["my_genes"] = list(set(attributes["my_genes"]))


    # for each term-genes annotation, we add that annotation to the
    # corresponding term in the ontology.

    # Note that in the case that the annotations
    # may only cover a subset of the ontology.

    # for term_id, genes in term_id_to_genes_map:
    #     term = ontology.get(term_id)
    #     if not term:
    #         print "term id not found in ontology: " + str(term_id)
    #     else:
    #         term["genes"] = genes

    print(str(len(term_map))) + " terms annotated in term_map"
    return term_map

def ontology2NiceCX(ontology, term_map, root_term_id):

    root = ontology[root_term_id]
    if not root:
        raise "cannot find root term by id " + str(root_term_id)

    term_id_to_node_id_map = {}
    G = NiceCXNetwork()

    print("adding nodes")
    # create all the nodes under root in ontology and add attributes, if any
    add_nodes(root, G, term_map, term_id_to_node_id_map)

    print("added " + str(len(term_id_to_node_id_map)) + " nodes")
    print("network now has  " + str(len(G.nodes())) + " nodes")

    print("adding edges")
    add_edges(root, G, term_id_to_node_id_map)
    print("network now has  " + str(len(G.edges())) + " edges")
    return G

def add_new_node(network, node_id, att_dict):
    node = network_get_node_by_name(network, node_name)
    if not node:
        node = network_add_node(name)
    
def add_nodes(parent_term, network, term_map, term_id_to_node_id_map):

    # check to see if this term has already been added to the id->node_id map
    if parent_term.id in term_id_to_node_id_map:
        return

    # only traverse nodes in the term_map
    if parent_term.id not in term_map:
        return

    attributes = term_map[parent_term.id]

    # dont include this term if it has no propagated or directly annotated genes
    if "genes" not in attributes or len(attributes["genes"]) is 0:
        return

    att_dict = {}
    att_dict["represents"] = parent_term.id
    # prune empty lists from attributes
    for att in attributes:
        val = attributes[att]
        if not (type(val) is list and len(val) is 0):
            att_dict[att] = val
    node = network.add_node(parent_term.name, att_dict)
    term_id_to_node_id_map[parent_term.id] = node.get_id()

    for child_term in parent_term.children:
        add_nodes(child_term, network, term_map, term_id_to_node_id_map)

def add_edges(parent_term, network, term_id_to_node_id_map):
    edge_id_counter = 1
    if parent_term.id in term_id_to_node_id_map:
        parent_node_id = term_id_to_node_id_map.get(parent_term.id)
        for child_term in parent_term.children:
            if child_term.id in term_id_to_node_id_map:
                child_node_id = term_id_to_node_id_map.get(child_term.id)
                if child_node_id == parent_node_id:
                    print("self loop : " + parent_term.name)
                else:
                    edge_count = network.number_of_edges(child_node_id, parent_node_id)
                    if edge_count is 0:
                        network.create_edge(id=edge_id_counter, edge_source=child_node_id, edge_target=parent_node_id, edge_interaction="hasParent")
                        #network.add_edge_between(child_node_id, parent_node_id, "hasParent")
                        # print child_term.name + " -> " + parent_term.name
                    add_edges(child_term, network, term_id_to_node_id_map)
