In [17]:
import json

f = open('/home/doruk/Storage/bioComputing/go/go.json', 'r')
go = json.load(f)

In [18]:
def namespace(uri:str) -> str:
    nsdict = {
        'http://purl.obolibrary.org/obo/go.owl#': 'go_owl',
        'http://purl.obolibrary.org/obo/GO_': 'GO'
    }
    for k,v in nsdict.items():
        if uri.startswith(k):
            return f'{v}:{uri.replace(k,"")}'
    return uri

In [19]:
import pandas as pd 

nodeList = []
for i in go['graphs'][0]['nodes']:
    n = {}
    if i['type'] == 'CLASS':
        n['id'] = namespace(i['id'])
        if i['meta']['definition'] is not None:
            n['definition'] = i['meta']['definition']['val']
        nodeList.append(n)
nodes = pd.DataFrame(nodeList)
nodes

Unnamed: 0,id,definition
0,GO:0000001,"The distribution of mitochondria, including th..."
1,GO:0000002,The maintenance of the structure and integrity...
2,GO:0000003,The production of new individuals that contain...
3,GO:0000004,
4,GO:0000005,OBSOLETE. Assists in the correct assembly of r...
...,...,...
49826,GO:2001313,The chemical reactions and pathways involving ...
49827,GO:2001314,The chemical reactions and pathways resulting ...
49828,GO:2001315,The chemical reactions and pathways resulting ...
49829,GO:2001316,The chemical reactions and pathways involving ...


In [20]:
trplist = []
for i in go['graphs'][0]['edges']:
    t = {}
    t['s'] = namespace(i['sub'])
    t['p'] = namespace(i['pred'])
    t['o'] = namespace(i['obj'])
    trplist.append(t)
tripleStore = pd.DataFrame(trplist)
tripleStore

Unnamed: 0,s,p,o
0,GO:0000001,is_a,GO:0048308
1,GO:0000001,is_a,GO:0048311
2,GO:0000002,is_a,GO:0007005
3,GO:0000003,is_a,GO:0008150
4,GO:0000006,is_a,GO:0005385
...,...,...,...
95738,GO:2001317,is_a,GO:1901362
95739,GO:2001317,is_a,GO:2001316
95740,go_owl:happens_during,is_a,go_owl:ends_during
95741,go_owl:negatively_regulates,is_a,go_owl:regulates


In [21]:
tripleStore.to_csv('edges.csv', sep='\t')
nodes.to_csv('nodes.csv', sep='\t')

In [39]:
subtrpls = []

def traverseUp(triples:pd.DataFrame, noil:list):
    for noi in noil:
        #print(noi)
        subjdf = triples[triples['s'] == noi]
        #print(subjdf)
        for i,r in subjdf.iterrows():
            t = {
                    's': r['s'], 
                    'p': r['p'],
                    'o': r['o']
                }
            if findspo(t) == -1:
                subtrpls.append(t)
        traverseUp(triples=triples, noil=subjdf['o'].tolist())

def traverseDown(edgedf:pd.DataFrame, noil:list):
    for noi in noil:
        objdf = edgedf[edgedf['o'] == noi]
        for i,r in objdf.iterrows():
            t = {
                    's': r['s'], 
                    'p': r['p'],
                    'o': r['o']
                }
            if findspo(t) == -1:
                subtrpls.append(t)
        traverseDown(edgedf=edgedf, noil=objdf['s'].tolist())

def findspo(spo:dict) -> int:
    for i in range(len(subtrpls)):
        if spo == subtrpls[i]:
            # print(i)
            return i
    return -1

In [40]:
godf = pd.read_csv('go_analysis.csv', sep='\t', index_col=0)
goList = godf['goid'].tolist()
goList

['GO:0000287',
 ' GO:0051287',
 'GO:0003723',
 ' GO:0003700',
 ' GO:0045893',
 'GO:0000049',
 ' GO:0046677',
 ' GO:0007049',
 ' GO:0008360',
 ' GO:0009252',
 ' GO:0030145',
 ' GO:0051301',
 ' GO:0071555',
 ' GO:0003677',
 ' GO:0016887',
 'GO:0005829',
 ' GO:0019843',
 ' GO:0005887',
 ' GO:0046872',
 'GO:0005506',
 ' GO:0051539',
 ' GO:0006260',
 ' GO:0032991',
 ' GO:0032153',
 ' GO:0043093',
 ' GO:0043190',
 ' GO:0004803',
 ' GO:0006313',
 ' GO:0005975',
 ' GO:0022857',
 ' GO:0006281',
 'GO:0005887',
 'GO:0003700',
 ' GO:0006351',
 ' GO:0006355',
 ' GO:0006099',
 ' GO:0010181',
 ' GO:0042597',
 ' GO:0009061',
 ' GO:0051537',
 ' GO:0071949',
 ' GO:0005525',
 ' GO:0006865',
 ' GO:0043565',
 ' GO:0009401',
 ' GO:0061077',
 ' GO:0015031',
 ' GO:0003735',
 ' GO:0006412',
 ' GO:0022627',
 ' GO:0055085',
 ' GO:0043709',
 ' GO:0016301',
 'GO:0006974',
 'GO:0009279',
 ' GO:0050661',
 ' GO:0006508',
 ' GO:0043022',
 ' GO:0051289',
 ' GO:0016791',
 ' GO:0015074',
 'GO:0000156',
 ' GO:0001216',
 '

In [41]:
traverseUp(tripleStore, goList[:3])
traverseDown(tripleStore, goList[:3])
subdf = pd.DataFrame(subtrpls)
subdf

Unnamed: 0,s,p,o
0,GO:0000287,is_a,GO:0046872
1,GO:0046872,is_a,GO:0043169
2,GO:0043169,is_a,GO:0043167
3,GO:0043167,is_a,GO:0005488
4,GO:0005488,is_a,GO:0003674
...,...,...,...
236,GO:1905215,is_a,GO:1905214
237,GO:1905216,is_a,GO:1905214
238,GO:1990968,is_a,GO:1905214
239,GO:1904572,is_a,GO:1905215


In [43]:
import pygraphviz as pgv

g = pgv.AGraph(directed=True)
for i,r in subdf.iterrows():
    s = str(r['s'])
    p = str(r['p'])
    o = str(r['o'])
    color = 'black'
    if s not in g:
        if s in goList:
            color = 'red'
        g.add_node(s, label=s, color=color)
    if o not in g:
        if s in goList:
            color = 'red'
        g.add_node(o, label=o, color=color)
    g.add_edge(s, o, label=p)
g.layout(prog='dot')
g.draw('go3.png')