## NaPDI machine reading knowledge graph instance closure

Closure run for SemRep and REACH predications in merged machine reading graph.

In [1]:
## Set up the CLIPS environment
import clips
env = clips.Environment()

MIN_PREDICATION_BELIEF = 0
MIN_TRANSITIVE_BELIEF = 0  # chosen because it retains most depth 1 transitive inferences over semmed  

## NOTE: BE SURE TO CLEAR test-inference.ntriples EACH TIME BEFORE RUNNING!!
## This accomplishes that
f = open("closure_output/test-inference.ntriples",'w')
f.close()

In [2]:
import pandas as pd
import numpy as np

In [4]:
#load merged machine reading graph - ntriples/gpickle - already mapped and processed - LOAD TSVs instead
##load TSVs after mapping for all machine reading output
mr_reach = 'reach_data/reach_all_predicates_mapped_processed_new_20230423.tsv'
mr_semrep = 'semrep_data/semrep_all_predicates_mapped_processed_new_20230423.tsv'

In [5]:
dfr = pd.read_csv(mr_reach, sep='\t')
dfr.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 47125 entries, 0 to 47124
Data columns (total 19 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   related_common_name   47125 non-null  object 
 1   subject_cui           44264 non-null  object 
 2   subject_name          44264 non-null  object 
 3   subject_source        47125 non-null  object 
 4   predicate             47125 non-null  object 
 5   object_source         47125 non-null  object 
 6   object_cui            44729 non-null  object 
 7   object_name           44729 non-null  object 
 8   subj_reach_grounding  47125 non-null  object 
 9   obj_reach_grounding   47125 non-null  object 
 10  pmid                  47125 non-null  int64  
 11  year                  47125 non-null  object 
 12  belief                47125 non-null  float64
 13  sentence              47125 non-null  object 
 14  pub_type              47125 non-null  object 
 15  source_section     

  interactivity=interactivity, compiler=compiler, result=result)


In [6]:
dfsem = pd.read_csv(mr_semrep, sep='\t')
dfsem.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 54348 entries, 0 to 54347
Data columns (total 17 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   index                54348 non-null  int64 
 1   pmid                 54348 non-null  int64 
 2   subject_cui          54348 non-null  object
 3   subject_name         54348 non-null  object
 4   subject_type         54348 non-null  object
 5   object_cui           54348 non-null  object
 6   object_name          54348 non-null  object
 7   object_type          54348 non-null  object
 8   year                 54348 non-null  int64 
 9   sentence             54348 non-null  object
 10  source_section       24088 non-null  object
 11  pub_type             54348 non-null  object
 12  related_common_name  54348 non-null  object
 13  predicate            54348 non-null  object
 14  predicate_obo        54348 non-null  object
 15  subject_obo          54348 non-null  object
 16  obje

In [7]:
dfr = dfr[['pmid', 'subject_cui', 'subject_name', 'object_cui', 'object_name', 'year', 'predicate', 'sentence', 'predicate_obo', 'subject_obo', 'object_obo', 'belief']]

In [8]:
dfsem = dfsem[['pmid', 'subject_cui', 'subject_name', 'object_cui',  'object_name', 'year', 'predicate', 'sentence', 'predicate_obo', 'subject_obo', 'object_obo']]

In [9]:
##all dataframes should have the same columns
#add belief = 0.8 to semrep predications
dfsem['belief'] = 0.8

In [10]:
df = pd.concat([dfr, dfsem])
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 101473 entries, 0 to 54347
Data columns (total 12 columns):
 #   Column         Non-Null Count   Dtype  
---  ------         --------------   -----  
 0   pmid           101473 non-null  int64  
 1   subject_cui    98612 non-null   object 
 2   subject_name   98612 non-null   object 
 3   object_cui     99077 non-null   object 
 4   object_name    99077 non-null   object 
 5   year           101473 non-null  object 
 6   predicate      101473 non-null  object 
 7   sentence       101473 non-null  object 
 8   predicate_obo  101473 non-null  object 
 9   subject_obo    101473 non-null  object 
 10  object_obo     101473 non-null  object 
 11  belief         101473 non-null  float64
dtypes: float64(1), int64(1), object(10)
memory usage: 10.1+ MB


In [11]:
df = df.drop_duplicates()
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 96441 entries, 0 to 54347
Data columns (total 12 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   pmid           96441 non-null  int64  
 1   subject_cui    93662 non-null  object 
 2   subject_name   93662 non-null  object 
 3   object_cui     94167 non-null  object 
 4   object_name    94167 non-null  object 
 5   year           96441 non-null  object 
 6   predicate      96441 non-null  object 
 7   sentence       96441 non-null  object 
 8   predicate_obo  96441 non-null  object 
 9   subject_obo    96441 non-null  object 
 10  object_obo     96441 non-null  object 
 11  belief         96441 non-null  float64
dtypes: float64(1), int64(1), object(10)
memory usage: 9.6+ MB


In [12]:
df = df.reset_index(drop=True)
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 96441 entries, 0 to 96440
Data columns (total 12 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   pmid           96441 non-null  int64  
 1   subject_cui    93662 non-null  object 
 2   subject_name   93662 non-null  object 
 3   object_cui     94167 non-null  object 
 4   object_name    94167 non-null  object 
 5   year           96441 non-null  object 
 6   predicate      96441 non-null  object 
 7   sentence       96441 non-null  object 
 8   predicate_obo  96441 non-null  object 
 9   subject_obo    96441 non-null  object 
 10  object_obo     96441 non-null  object 
 11  belief         96441 non-null  float64
dtypes: float64(1), int64(1), object(10)
memory usage: 8.8+ MB


In [13]:
df['predicate'].value_counts()

Activation                21145
Inhibition                15811
interacts_with             9181
part_of                    8980
coexists_with              5328
affects                    5220
stimulates                 4900
inhibits                   4764
IncreaseAmount             3732
treats                     2907
DecreaseAmount             2721
disrupts                   2594
causes                     2405
augments                   2081
associated_with            1059
produces                    777
Phosphorylation             716
Hydroxylation               426
prevents                    414
Dephosphorylation           282
predisposes                 260
Dehydroxylation             153
treats(infer)               138
Demethylation               137
Methylation                  94
precedes                     54
Acetylation                  40
Glycosylation                40
Deacetylation                20
Ubiquitination               16
interacts_with(infer)        11
associat

In [14]:
df['predicate_obo'].value_counts()

http://purl.obolibrary.org/obo/RO_0002448     21145
http://purl.obolibrary.org/obo/RO_0002449     20575
http://purl.obolibrary.org/obo/RO_0002434      9192
http://purl.obolibrary.org/obo/BFO_0000050     8980
http://purl.obolibrary.org/obo/RO_0002490      5328
http://purl.obolibrary.org/obo/RO_0002596      5220
http://purl.obolibrary.org/obo/RO_0002213      4900
http://purl.obolibrary.org/obo/RO_0011009      3732
http://purl.obolibrary.org/obo/RO_0002606      3045
http://purl.obolibrary.org/obo/RO_0011010      2721
http://purl.obolibrary.org/obo/RO_0002212      2594
http://purl.obolibrary.org/obo/RO_0002566      2405
http://purl.obolibrary.org/obo/RO_0002598      2081
http://purl.obolibrary.org/obo/RO_0002610      1070
http://purl.obolibrary.org/obo/RO_0003000       777
http://purl.obolibrary.org/obo/RO_0002447       716
http://purl.obolibrary.org/obo/GO_0018126       426
http://purl.obolibrary.org/obo/RO_0002599       414
http://purl.obolibrary.org/obo/GO_0006470       282
http://purl.

### Rules

In [15]:
## Example
##Transitive predicates - part of, precedes
##Symmetric predicates - interacts_with, moleculary_interacts_with
###NPKG version 2: removed 'positively regulates' from transitive predicates
env.clear()
env.reset()

env.eval('(open "closure_output/test-inference.ntriples" writeFile "a")')

env.build("""
(deftemplate oav
 (slot object)
 (slot attribute)
 (slot value)
 (slot predNS)
 (slot inferred (default No))
 (slot belief (default 0.0)))
""")

## Transitive rule 
env.build("""
(defrule transitive
  "a simple transitivity rule"
  (oav (object ?o)
       (attribute ?pred&:(member$ ?pred (create$ http://purl.obolibrary.org/obo/BFO_0000063 http://purl.obolibrary.org/obo/BFO_0000050)))
       (value ?s)
       (predNS RO)
       (inferred No)
       (belief ?b1))
  (oav (object ?s)
       (attribute ?pred)
       (value ?q)
       (predNS RO)
       (inferred No)
       (belief ?b2))
   (test (>= (* ?b1 ?b2) {}))
  =>
  (assert (oav (object ?o)
               (attribute ?pred)
               (value ?q)
               (inferred Yes)
               (predNS RO)
               (belief (* ?b1 ?b2))))
  
  (printout writeFile (format nil "<%s><%s><%s>.%n" ?o ?pred ?q))   
)
""".format(MIN_TRANSITIVE_BELIEF))
# NOTE: add this line to RHS to see the belief scores:
# (printout writeFile (format nil "b1: %f, b2: %f, belief: %f>.%n" ?b1 ?b2 (* ?b1 ?b2))) 

## simplerule for symmetric relationships 
env.build("""
(defrule symmetric
  "a simple symmetry rule"
  (oav (object ?o)
       (attribute ?pred&:(member$ ?pred (create$ http://purl.obolibrary.org/obo/RO_0002434 http://purl.obolibrary.org/obo/RO_0002436)))
       (value ?s)
       (predNS RO)
       (inferred No)
       (belief ?b))
  =>
  (assert (oav (object ?s)
               (attribute ?pred)
               (value ?o)
               (inferred Yes)
               (predNS RO)
               (belief ?b)))
  
  (printout writeFile (format nil "<%s><%s><%s>.%n" ?o ?pred ?s))  
)
""")


In [16]:
predMapD = {
    'regulateactivity':'RO_0011002',
    'regulateamount':'RO_0011003',
    'phosphorylation':'RO_0002447',
    'dephosphorylation':'GO_0006470',
    'ubiquitination':'RO_0002480',
    'deubiquitination':'GO_0016579',
    'sumoylation':'RO_0002436',
    'desumoylation':'RO_0002436',
    'hydroxylation':'GO_0018126',
    'dehydroxylation':'RO_0002436',
    'acetylation':'GO_0006473',
    'deacetylation':'GO_0006476',
    'glycosylation':'GO_0006486',
    'deglycosylation':'GO_0006517',
    'farnesylation':'RO_0002436',
    'defarnesylation':'RO_0002436',
    'geranylgeranylation':'RO_0002436',
    'degeranylgeranylation':'RO_0002436',
    'palmitoylation':'RO_0002436',
    'depalmitoylation':'RO_0002436',
    'myristoylation':'RO_0002436',
    'demyristoylation':'RO_0002436',
    'ribosylation':'RO_0002436',
    'deribosylation':'RO_0002436',
    'methylation':'GO_0006479',
    'demethylation':'GO_0006482',
    'activation':'RO_0002448',
    'inhibition':'RO_0002449',
    'increaseamount':'RO_0011009',
    'decreaseamount':'RO_0011010',
    'affects': 'RO_0002596',
    'associated_with': 'RO_0002610',
    'augments': 'RO_0002598',
    'causes': 'RO_0002566',
    'coexists_with': 'RO_0002490',
    'complicates': 'RO_0003309',
    'disrupts': 'RO_0002212',
    'inhibits': 'RO_0002449',
    'interacts_with': 'RO_0002434',
    'part_of': 'BFO_0000050',
    'precedes': 'BFO_0000063',
    'predisposes': 'RO_0003302',
    'prevents': 'RO_0002599',
    'produces': 'RO_0003000',
    'stimulates': 'RO_0002213',
    'treats': 'RO_0002606'
}


### Facts 

In [17]:
originalTriplesF = open('closure_output/original-triples.ntriples','w')

resourceD = {}
resourceDinv = {}
rcnt = 0
fctStrD = {}
#semTypeD = {}
labelsD = {}

for i in range(len(df.index)): 
   
    belief = df.at[i, 'belief']
    if belief < MIN_PREDICATION_BELIEF:
        continue
    
    (subj_obo, pred_obo, obj_obo) = (df.at[i, 'subject_obo'],
                                    df.at[i, 'predicate_obo'],
                                    df.at[i, 'object_obo'])
    (subj, pred, obj) = (df.at[i,'subject_name'],
                        df.at[i,'predicate'].lower().strip(),
                        df.at[i, 'object_name'])

    # only write out and/or do inference over some predicates
    if pred not in predMapD:        
        continue
        
    # write the original triple to file, regardless of the predicate
    originalTriplesF.write("<{}><{}><{}>.\n".format(subj_obo,pred_obo,obj_obo))
        
    # Track the subject and object names
    subjName = subj
    objName = obj
    
    
    '''# Track the semantic types
    semTypesStr = stmtSplt[SUBJECT_TYPE]
    if semTypesStr.find('[') == -1:
        if not semTypeD.get(subj):
            semTypeD[subj] = [semTypesStr.strip()]
        else:
            semTypeD[subj] = semTypeD[subj].append(semTypesStr.strip())
    else:
        semTypesStr = semTypesStr.replace("'",'').replace('[','').replace(']','')
        semTypesL = [x.strip() for x in semTypesStr.split(',')]
        if not semTypeD.get(subj):
            semTypeD[subj] = semTypesL
        else:
            semTypeD[subj] = semTypeD[subj] + semTypesL
            
    semTypesStr = stmtSplt[OBJECT_TYPE]
    if semTypesStr.find('[') == -1:
        if not semTypeD.get(obj):
            semTypeD[obj] = [semTypesStr.strip()]
        else:
            semTypeD[obj] = semTypeD[obj].append(semTypesStr.strip())
    else:
        semTypesStr = semTypesStr.replace("'",'').replace('[','').replace(']','')
        semTypesL = [x.strip() for x in semTypesStr.split(',')]
        if not semTypeD.get(obj):
            semTypeD[obj] = semTypesL
        else:
            semTypeD[obj] = semTypeD[obj] + semTypesL'''
        
    if not resourceD.get(subj_obo):
        resourceD[subj_obo] = 'r{}'.format(rcnt)
        resourceDinv['r{}'.format(rcnt)] = subj_obo
        rcnt += 1
    
    if not resourceD.get(obj_obo):
        resourceD[obj_obo] = 'r{}'.format(rcnt)
        resourceDinv['r{}'.format(rcnt)] = obj_obo
        rcnt += 1    
    
        
    fctStr = """
(oav (object {})
     (attribute {})
     (value {})
     (predNS {})
     (belief {})
)""".format(resourceD[subj_obo], pred_obo, resourceD[obj_obo], 'RO', belief)
    
    if not fctStrD.get(fctStr): 
        env.assert_string(fctStr)
        fctStrD[fctStr] = 1
    
    
    # write the human readable labels as triples
    if isinstance(subj,str):
        originalTriplesF.write('<{}><http://www.w3.org/2000/01/rdf-schema#label> "{}".\n'.format(subj_obo,subj.replace('"','')))
    if isinstance(obj,str):
        originalTriplesF.write('<{}><http://www.w3.org/2000/01/rdf-schema#label> "{}".\n'.format(obj_obo,obj.replace('"','')))

'''# Write the semantic types as triples
for (e,st) in semTypeD.items():
    if st:
        stSet = set(st)
        for elt in stSet:
            originalTriplesF.write("<http://dikb.org/ad#{}><http://www.w3.org/1999/02/22-rdf-syntax-ns#type><http://umls.org/st/#{}>.\n".format(e,elt.replace('"','')))
'''
originalTriplesF.close()

In [18]:
i = 0
for fact in env.facts():
    print(fact)
    if i == 20:
        break
    i += 1

(oav (object r0) (attribute http://purl.obolibrary.org/obo/RO_0002448) (value r1) (predNS RO) (inferred No) (belief 0.65))
(oav (object r2) (attribute http://purl.obolibrary.org/obo/RO_0002448) (value r3) (predNS RO) (inferred No) (belief 0.65))
(oav (object r2) (attribute http://purl.obolibrary.org/obo/RO_0002448) (value r4) (predNS RO) (inferred No) (belief 0.65))
(oav (object r5) (attribute http://purl.obolibrary.org/obo/RO_0002448) (value r6) (predNS RO) (inferred No) (belief 0.65))
(oav (object r7) (attribute http://purl.obolibrary.org/obo/RO_0002448) (value r7) (predNS RO) (inferred No) (belief 0.65))
(oav (object r8) (attribute http://purl.obolibrary.org/obo/RO_0002448) (value r9) (predNS RO) (inferred No) (belief 0.65))
(oav (object r10) (attribute http://purl.obolibrary.org/obo/RO_0002448) (value r11) (predNS RO) (inferred No) (belief 0.65))
(oav (object r12) (attribute http://purl.obolibrary.org/obo/RO_0002448) (value r9) (predNS RO) (inferred No) (belief 0.65))
(oav (object 

In [19]:
env.run()

21693

The output tells use how many RHS made changes to working memory

In [20]:
for k in resourceD:
    print(k,resourceD[k])
    break

http://purl.obolibrary.org/obo/PR_P10635 r0


In [21]:
for k in resourceDinv:
    print(k,resourceDinv[k])
    break

r0 http://purl.obolibrary.org/obo/PR_P10635


In [22]:
import re

f = open('closure_output/test-inference.ntriples','r')
buf = f.read()
f.close()
rsL = buf.split('\n')

'''rgx = re.compile('(http://purl.obolibrary.org/obo/)([A-Z_0-9]+)')
for i in range(0,len(rsL)):
    if rsL[i] == "":
        continue
        
    ml = rgx.findall(rsL[i])
    if len(ml) != 1:
        print('ERROR: could not match on predicate regex: {}'.format(rsL[i]))
        continue
        
    (uri,predicate) = ml[0]
    rsL[i] = rsL[i].replace(predicate, predMapD[predicate])'''
    

f = open('closure_output/inferred-transitive-and-symmetric.ntriples','w')
rgx = re.compile('(r[0-9]+)')
for it in rsL:
    keyL = rgx.findall(it)
    newTr = it
    for k in keyL:
        if resourceDinv.get(k):
            newTr = newTr.replace(k, resourceDinv[k])            
        else:
            print('ERROR: key not found in resourceDinv: {}'.format(k))
    f.write(newTr + '\n')
f.close()


### Save as gpickle file with metadata

In [23]:
import pandas as pd
import numpy as np
import pickle
from rdflib.namespace import OWL, RDF, RDFS
import os
import pickle, json

#Create networkx graph from triples
import glob
import hashlib
import json
import networkx as nx  # type: ignore
import os
import os.path

from collections import Counter  # type: ignore
from more_itertools import unique_everseen  # type: ignore
from rdflib import BNode, Graph, Literal, Namespace, URIRef  # type: ignore
from rdflib.namespace import OWL, RDF, RDFS  # type: ignore
from rdflib.plugins.serializers.nt import _quoteLiteral  # type: ignore
import subprocess

from tqdm import tqdm  # type: ignore
from typing import Dict, List, Optional, Set, Tuple, Union

In [24]:
with open('closure_output/inferred-transitive-and-symmetric.ntriples','r') as file1:
    g = file1.read()
graph1 = g.split('\n')

In [25]:
obo = Namespace('http://purl.obolibrary.org/obo/')
oboinowl = Namespace('http://www.geneontology.org/formats/oboInOwl#')
schema = Namespace('http://www.w3.org/2001/XMLSchema#')
napdi = Namespace('http://napdi.org/napdi_srs_imports:')

In [26]:
def n3(node: Union[URIRef, BNode, Literal]) -> str:
    """Method takes an RDFLib node of type BNode, URIRef, or Literal and serializes it to meet the RDF 1.1 NTriples
    format.
    Src: https://github.com/RDFLib/rdflib/blob/c11f7b503b50b7c3cdeec0f36261fa09b0615380/rdflib/plugins/serializers/nt.py
    Args:
        node: An RDFLib
    Returns:
        serialized_node: A string containing the serialized
    """
    if isinstance(node, Literal): serialized_node = "%s" % _quoteLiteral(node)
    else: serialized_node = "%s" % node.n3()
    return serialized_node

In [27]:
graph1[0]

'<http://purl.obolibrary.org/obo/GO_0019814><http://purl.obolibrary.org/obo/RO_0002434><http://purl.obolibrary.org/obo/CHEBI_50904>.'

In [28]:
import re

In [None]:
x = '<http://napdi.org/napdi_srs_imports:epigallocatechin_gallate><http://purl.obolibrary.org/obo/RO_0002213><http://purl.obolibrary.org/obo/CHEBI_27843>.'
rgx = re.compile('(http://[a-zA-Z0-9/._:]+)')
rgx.findall(x)

In [29]:
#convert rdflib graph to multidigraph - code borrowed from PheKnowLator: kg_utils.py
#use the pred key to also create a dictionary with metadata about the edge - add INF tag for inferred triples
errors = []
nx_mdg = nx.MultiDiGraph()
for triple in graph1:
    #do not save label predicate to gpickle

    rgx = re.compile('(http://[a-zA-Z0-9/._:]+)')
    match = rgx.findall(triple)
    if len(match)!=3:
        errors.append(triple)
        continue
    subj = match[0]
    obj = match[2]
    pred = match[1]
    s = URIRef(subj)
    p = URIRef(pred)
    o = URIRef(obj)

    pred_key = hashlib.md5('{}{}{}'.format(n3(s), n3(p), n3(o)).encode()).hexdigest()
    nx_mdg.add_node(s, key=n3(s))
    nx_mdg.add_node(o, key=n3(o))
    nx_mdg.add_edge(s, o, **{'key': p, 'predicate_key': pred_key, 'weight':0.0,
                             'source_graph': 'machine_read_INF'})

###add more metadata to the edges

nx.write_gpickle(nx_mdg, "closure_output/machineread_inferred_symmetric_transitive.gpickle")

In [30]:
#this should have less edges than rdflib graph after removing 'labels'
nodes = nx.number_of_nodes(nx_mdg)
edges = nx.number_of_edges(nx_mdg)
density = nx.density(nx_mdg)
avg_deg = float(edges)/nodes
print(nodes, edges, density, avg_deg)

2174 18450 0.003905504157192203 8.486660533578657


In [31]:
len(errors)

2

In [32]:
errors

['<http://purl.obolibrary.org/obo/CHEBI_47519><http://purl.obolibrary.org/ob',
 '']

In [33]:
##get stats of inferred triples
with open('closure_output/inferred-transitive-and-symmetric.ntriples') as filei:
    g = filei.read()
graph1 = g.split('\n')
len(graph1)

21633

In [34]:
graph1[0]

'<http://purl.obolibrary.org/obo/GO_0019814><http://purl.obolibrary.org/obo/RO_0002434><http://purl.obolibrary.org/obo/CHEBI_50904>.'

In [35]:
s = []
p = []
o = []
for triple in graph1:
    rgx = re.compile('(http://[a-zA-Z0-9/._:]+)')
    match = rgx.findall(triple)
    if len(match)!=3:
        errors.append(triple)
        continue
    subj = match[0]
    obj = match[2]
    pred = match[1]
    s.append(subj)
    p.append(pred)
    o.append(obj)

In [36]:
dfinf = pd.DataFrame()
dfinf['subject_obo'] = s
dfinf['pred_obo'] = p
dfinf['object_obo'] = o
dfinf.head()

Unnamed: 0,subject_obo,pred_obo,object_obo
0,http://purl.obolibrary.org/obo/GO_0019814,http://purl.obolibrary.org/obo/RO_0002434,http://purl.obolibrary.org/obo/CHEBI_50904
1,http://purl.obolibrary.org/obo/CHEBI_29014,http://purl.obolibrary.org/obo/RO_0002434,http://purl.obolibrary.org/obo/PR_P10635
2,http://purl.obolibrary.org/obo/CHEBI_29014,http://purl.obolibrary.org/obo/RO_0002434,http://purl.obolibrary.org/obo/CHEBI_4470
3,http://purl.obolibrary.org/obo/CHEBI_22586,http://purl.obolibrary.org/obo/RO_0002434,http://purl.obolibrary.org/obo/CHEBI_26519
4,http://purl.obolibrary.org/obo/CHEBI_17858,http://purl.obolibrary.org/obo/RO_0002434,http://purl.obolibrary.org/obo/CHEBI_25523


In [37]:
dfinf.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 21631 entries, 0 to 21630
Data columns (total 3 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   subject_obo  21631 non-null  object
 1   pred_obo     21631 non-null  object
 2   object_obo   21631 non-null  object
dtypes: object(3)
memory usage: 507.1+ KB


In [38]:
dfinf['pred_obo'].value_counts()

http://purl.obolibrary.org/obo/BFO_0000050    14807
http://purl.obolibrary.org/obo/RO_0002434      6704
http://purl.obolibrary.org/obo/RO_0002436        96
http://purl.obolibrary.org/obo/BFO_0000063       24
Name: pred_obo, dtype: int64

In [None]:
##Update April 2023
14807 - part of
6704 - interacts with
96 - molecularly interacts with
24 - precedes