Build a graph from TO using D3

In [1]:
# import libraries (including our short d3_lib script)
from IPython.core.display import HTML
import d3_lib, random
import re
import json

### get all the TO classes and the sublcassOf relations

In [2]:
%%script bash
arq --results JSON --data https://raw.githubusercontent.com/Planteome/plant-trait-ontology/master/plant-trait-ontology.obo.owl '
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>  
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>  
PREFIX owl: <http://www.w3.org/2002/07/owl#> 
PREFIX oio: <http://www.geneontology.org/formats/oboInOwl#>  
PREFIX obo: <http://purl.obolibrary.org/obo/>  

#CONSTRUCT 
#{
#    ?x rdfs:subClassOf ?o .
#}
SELECT ?x ?xlabel ?o ?olabel
WHERE
{
    ?x rdfs:subClassOf ?o .
    filter(!isblank(?o)) 
    filter(!regex(str(?o), "PATO"))
    filter(regex(str(?o), "TO"))
    ?x rdfs:label ?xlabel . 
    ?o rdfs:label ?olabel .
}' > to.json

### get for each TO class its level in the hierarchy

will be used to customise the graph

In [3]:
%%script bash
arq --results JSON --data https://raw.githubusercontent.com/Planteome/plant-trait-ontology/master/plant-trait-ontology.obo.owl '
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>  
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>  
PREFIX owl: <http://www.w3.org/2002/07/owl#> 
PREFIX oio: <http://www.geneontology.org/formats/oboInOwl#>  
PREFIX obo: <http://purl.obolibrary.org/obo/>  

select ?super (count(?mid) as ?distance) { 
  ?super rdfs:subClassOf* ?mid .
  ?mid rdfs:subClassOf+ <http://purl.obolibrary.org/obo/TO_0000387> .
}
group by ?super ?sub 
order by ?super ?sub' > level.json

### get for each TO class its type

In [4]:
%%script bash
arq --results JSON --data https://raw.githubusercontent.com/Planteome/plant-trait-ontology/master/plant-trait-ontology.obo.owl '
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>  
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>  
PREFIX owl: <http://www.w3.org/2002/07/owl#> 
PREFIX oio: <http://www.geneontology.org/formats/oboInOwl#>  
PREFIX obo: <http://purl.obolibrary.org/obo/>  

select ?sub ?mid { 
  ?sub rdfs:subClassOf* ?mid .
  ?mid rdfs:subClassOf <http://purl.obolibrary.org/obo/TO_0000387> .
}
group by ?mid ?sub 
' > type.json

### create the JSON file used by d3js

In [13]:
with open('to.json') as data_file:    
    data = json.load(data_file)
    
with open('level.json') as level_file:    
    level = json.load(level_file)
    
with open('type.json') as type_file:    
    type = json.load(type_file) 
    
    list = data["results"]["bindings"]
    
    graph = {"nodes": [], "links": []}
    
    for entry in list:
        x = 0.1
        o = 0.1
        #check if the nodes already exist in the graph
        for e in graph["nodes"]:
            if e["name"] == entry["xlabel"]["value"]:
                x=graph["nodes"].index(e)
            if e["name"] == entry["olabel"]["value"]:
                o=graph["nodes"].index(e)
        # if the term does not exist, create it        
        if x==0.1:
            for l in level["results"]["bindings"]:
                if l["super"]["value"] == entry["x"]["value"]:
                    graph["nodes"].append( {"name": entry["xlabel"]["value"], "value": 1/int(l["distance"]["value"]), "id":entry["x"]["value"] } )
                    x=len(graph["nodes"])-1
                    #1/int(l["distance"]["value"])
                    
        if o==0.1:
                #for lu in level["results"]["bindings"]:
                 #   if lu["super"]["value"] == entry["o"]["value"]:
                   #     graph["nodes"].append( {"name": entry["olabel"]["value"], "value": 1/int(lu["distance"]["value"]), "id":entry["o"]["value"]} )
                    #    o=len(graph["nodes"])-1
                  #  else:
                        graph["nodes"].append( {"name": entry["olabel"]["value"], "value": 1, "id":entry["o"]["value"]} )
                        o=len(graph["nodes"])-1

        # create the link
        if  entry["x"]["value"] != entry["o"]["value"]:
            graph["links"].append( {"source": x, "target": o, "value": 10} )
    
    # add the types
    for node in graph["nodes"]:
        for t in type["results"]["bindings"]:
            if t["sub"]["value"] == node["id"]:
                node["group"] = re.findall('\d+', t["mid"]["value"])
        
#data["results"]['bindings'][0]['x']['value']


In [14]:
# visualize as force-directed graph in D3
HTML(d3_lib.set_styles(['force_directed_graph']) + 
'<script src="lib/d3/d3.min.js"></script>' + 
      d3_lib.draw_graph('force_directed_graph', {'data': graph}) )

### Extras
#### Save the graph

In [11]:
with open('data.json', 'w') as outfile:
    json.dump(graph, outfile)

In [15]:
graph

{'links': [{'source': 0, 'target': 0.1, 'value': 10},
  {'source': 1, 'target': 0.1, 'value': 10},
  {'source': 2, 'target': 0.1, 'value': 10},
  {'source': 3, 'target': 0.1, 'value': 10},
  {'source': 4, 'target': 0.1, 'value': 10},
  {'source': 5, 'target': 0.1, 'value': 10},
  {'source': 6, 'target': 0.1, 'value': 10},
  {'source': 7, 'target': 0.1, 'value': 10},
  {'source': 8, 'target': 0.1, 'value': 10},
  {'source': 9, 'target': 0.1, 'value': 10},
  {'source': 10, 'target': 0.1, 'value': 10},
  {'source': 11, 'target': 0.1, 'value': 10},
  {'source': 12, 'target': 0.1, 'value': 10},
  {'source': 13, 'target': 0.1, 'value': 10},
  {'source': 14, 'target': 0.1, 'value': 10},
  {'source': 15, 'target': 0.1, 'value': 10},
  {'source': 16, 'target': 0.1, 'value': 10},
  {'source': 17, 'target': 0.1, 'value': 10},
  {'source': 18, 'target': 0.1, 'value': 10},
  {'source': 19, 'target': 0.1, 'value': 10},
  {'source': 20, 'target': 0.1, 'value': 10},
  {'source': 21, 'target': 0.1, 'va

In [30]:
level

{'head': {'vars': ['super', 'distance']},
 'results': {'bindings': [{'distance': {'datatype': 'http://www.w3.org/2001/XMLSchema#integer',
     'type': 'typed-literal',
     'value': '7'},
    'super': {'type': 'uri',
     'value': 'http://purl.obolibrary.org/obo/TO_0000001'}},
   {'distance': {'datatype': 'http://www.w3.org/2001/XMLSchema#integer',
     'type': 'typed-literal',
     'value': '7'},
    'super': {'type': 'uri',
     'value': 'http://purl.obolibrary.org/obo/TO_0000003'}},
   {'distance': {'datatype': 'http://www.w3.org/2001/XMLSchema#integer',
     'type': 'typed-literal',
     'value': '6'},
    'super': {'type': 'uri',
     'value': 'http://purl.obolibrary.org/obo/TO_0000004'}},
   {'distance': {'datatype': 'http://www.w3.org/2001/XMLSchema#integer',
     'type': 'typed-literal',
     'value': '8'},
    'super': {'type': 'uri',
     'value': 'http://purl.obolibrary.org/obo/TO_0000006'}},
   {'distance': {'datatype': 'http://www.w3.org/2001/XMLSchema#integer',
     'type

In [53]:
with open('to.json') as data_file:    
    data = json.load(data_file)
    
    list = data["results"]["bindings"]
    
    graph = {"nodes": [], "links": []}
    
    for entry in list:
        x = 0.1
        o = 0.1
        #check if the nodes already exist in the graph
        for e in graph["nodes"]:
            if e["name"] == entry["xlabel"]["value"]:
                x=graph["nodes"].index(e)
            if e["name"] == entry["olabel"]["value"]:
                o=graph["nodes"].index(e)
        # if the term does not exist, create it        
        if x==0.1:
            graph["nodes"].append( {"name": entry["xlabel"]["value"], "value": 1, "id":entry["x"]["value"] } )
            x=len(graph["nodes"])-1
        if o==0.1:
            graph["nodes"].append( {"name": entry["olabel"]["value"], "value": 1, "id":entry["o"]["value"]} )
            o=len(graph["nodes"])-1

        # create the link
        if  entry["x"]["value"] != entry["o"]["value"]:
            graph["links"].append( {"source": x, "target": o, "value": 10} )
    