In [1]:
import networkx
import obonet

In [2]:
%%time
url = 'http://current.geneontology.org/ontology/go-basic.obo'
graph = obonet.read_obo(url)

Wall time: 1min 22s


In [3]:
# Number of nodes
len(graph)

44945

In [4]:
# Number of edges
graph.number_of_edges()

92715

In [5]:
# Check if the ontology is a DAG
networkx.is_directed_acyclic_graph(graph)

True

In [6]:
# Retreive properties of phagocytosis
graph.node['GO:0045399']

{'name': 'regulation of interleukin-3 biosynthetic process',
 'namespace': 'biological_process',
 'def': '"Any process that modulates the frequency, rate or extent of the chemical reactions and pathways resulting in the formation of interleukin-3." [GOC:go_curators]',
 'synonym': ['"regulation of IL-3 biosynthesis" EXACT []',
  '"regulation of IL-3 biosynthetic process" EXACT []',
  '"regulation of interleukin-3 anabolism" EXACT []',
  '"regulation of interleukin-3 biosynthesis" EXACT []',
  '"regulation of interleukin-3 formation" EXACT []',
  '"regulation of interleukin-3 synthesis" EXACT []'],
 'is_a': ['GO:0032672', 'GO:0042035'],
 'relationship': ['regulates GO:0042223']}

In [7]:
# Retreive properties of pilus shaft
graph.node['GO:0009418']

{'name': 'pilus shaft',
 'namespace': 'cellular_component',
 'def': '"The long, slender, mid section of a pilus." [GOC:jl]',
 'synonym': ['"fimbrial shaft" EXACT []'],
 'is_a': ['GO:0044443']}

In [8]:
id_to_name = {id_: data.get('name') for id_, data in graph.nodes(data=True)}
name_to_id = {data['name']: id_ for id_, data in graph.nodes(data=True) if 'name' in data}

In [9]:
# Get the name for GO:0042552
id_to_name['GO:0042552']

'myelination'

In [10]:
# Get the id for myelination
name_to_id['myelination']

'GO:0042552'

In [11]:
# Find edges to parent terms
node = name_to_id['pilus part']
for child, parent, key in graph.out_edges(node, keys=True):
    print(f'• {id_to_name[child]} ⟶ {key} ⟶ {id_to_name[parent]}')

• pilus part ⟶ is_a ⟶ intracellular organelle part
• pilus part ⟶ is_a ⟶ cell projection part
• pilus part ⟶ part_of ⟶ pilus


In [12]:
# Find edges to children terms
node = name_to_id['pilus part']
for parent, child, key in graph.in_edges(node, keys=True):
    print(f'• {id_to_name[child]} ⟵ {key} ⟵ {id_to_name[parent]}')

• pilus part ⟵ is_a ⟵ pilus shaft
• pilus part ⟵ is_a ⟵ pilus tip


In [13]:
sorted(id_to_name[superterm] for superterm in networkx.descendants(graph, 'GO:0042552'))

['anatomical structure development',
 'axon ensheathment',
 'biological_process',
 'cellular process',
 'developmental process',
 'ensheathment of neurons',
 'multicellular organism development',
 'multicellular organismal process',
 'nervous system development',
 'system development']

In [14]:
sorted(id_to_name[subterm] for subterm in networkx.ancestors(graph, 'GO:0042552'))

['central nervous system myelin formation',
 'central nervous system myelin maintenance',
 'central nervous system myelination',
 'myelin assembly',
 'myelin maintenance',
 'myelination in peripheral nervous system',
 'myelination of anterior lateral line nerve axons',
 'myelination of lateral line nerve axons',
 'myelination of posterior lateral line nerve axons',
 'negative regulation of myelination',
 'paranodal junction assembly',
 'peripheral nervous system myelin formation',
 'peripheral nervous system myelin maintenance',
 'positive regulation of myelination',
 'regulation of myelination']

In [15]:
paths = networkx.all_simple_paths(
    graph,
    source=name_to_id['starch binding'],
    target=name_to_id['molecular_function']
)
for path in paths:
    print('•', ' ⟶ '.join(id_to_name[node] for node in path))

• starch binding ⟶ polysaccharide binding ⟶ carbohydrate binding ⟶ binding ⟶ molecular_function


In [16]:
graph.graph

{'name': 'go',
 'typedefs': [{'id': 'negatively_regulates',
   'name': 'negatively regulates',
   'namespace': 'external',
   'xref': ['RO:0002212'],
   'is_a': ['regulates']},
  {'id': 'never_in_taxon',
   'name': 'never_in_taxon',
   'namespace': 'external',
   'xref': ['RO:0002161'],
   'is_metadata_tag': 'true',
   'is_class_level': 'true'},
  {'id': 'part_of',
   'name': 'part of',
   'namespace': 'external',
   'xref': ['BFO:0000050'],
   'is_transitive': 'true'},
  {'id': 'positively_regulates',
   'name': 'positively regulates',
   'namespace': 'external',
   'xref': ['RO:0002213'],
   'holds_over_chain': ['negatively_regulates negatively_regulates'],
   'is_a': ['regulates']},
  {'id': 'regulates',
   'name': 'regulates',
   'namespace': 'external',
   'xref': ['RO:0002211'],
   'is_transitive': 'true'}],
 'instances': [],
 'format-version': '1.2',
 'data-version': 'releases/2019-07-01',
 'subsetdef': ['gocheck_do_not_annotate "Term not to be used for direct annotation"',
  'g