In [1]:
import obonet as obo
import networkx as nx

In [2]:
# Using obonet, read in OBOFoundry URL for ncit.obo
## Reference http://www.obofoundry.org/ for all available OBO ontologies via OBOFoundry
# This will take ~2.5 minutes to complete

url = 'http://purl.obolibrary.org/obo/ncit.obo'
graph = obo.read_obo(url)
len(graph)

162169

In [3]:
# Number of edges
graph.number_of_edges()

235888

In [4]:
# Number of nodes
graph.number_of_nodes()

162169

In [5]:
# Retreive properties of Parp Inhibitor (Poly (ADP-Ribose) Polymerase Inhibitor) -> Parp Inhibitor
graph.nodes['NCIT:C62554']

{'name': 'Poly (ADP-Ribose) Polymerase Inhibitor',
 'def': '"Any substance that inhibits Poly (ADP-Ribose) polymerase, an enzyme involved in detecting DNA single strand breaks and the initiation of DNA repair. Inhibition of Poly (ADP-Ribose) polymerase has direct cytotoxic effects by inhibiting DNA repair and causing cell death." []',
 'subset': ['NCIT:C116977', 'NCIT:C116978'],
 'synonym': ['"PARP Inhibitor" EXACT []',
  '"PARP Inhibitor" EXACT []',
  '"PARP inhibitor" EXACT []',
  '"Poly (ADP-Ribose) Polymerase Inhibitor" EXACT []',
  '"poly (ADP-ribose) polymerase inhibitor" EXACT []',
  '"Poly(ADP-Ribose) Polymerase Inhibitor" EXACT []'],
 'is_a': ['NCIT:C163758', 'NCIT:C471'],
 'relationship': ['NCIT:R146 NCIT:C48215'],
 'property_value': ['NCIT:NHC0 "C62554" xsd:string',
  'NCIT:P106 "Chemical Viewed Functionally" xsd:string',
  'NCIT:P107 "PARP Inhibitor" xsd:string',
  'NCIT:P108 "Poly (ADP-Ribose) Polymerase Inhibitor" xsd:string',
  'NCIT:P207 "C1882413" xsd:string',
  'NCIT:

In [8]:
# Define a id (source code i.e. NCIT:C62554) to preferred label mapping for NCI Thesaurus OBO file 
id_to_name = {id_: data.get('name') for id_, data in graph.nodes(data=True)}
name_to_id = {data['name']: id_ for id_, data in graph.nodes(data=True) if 'name' in data}


In [28]:
# id -> name mapping
print({len(id_to_name)})
id_to_name

{162169}


{'NCIT:C1000': 'Recombinant Amphiregulin',
 'NCIT:C10000': 'Cyclophosphamide/Fluoxymesterone/Mitolactol/Prednisone/Tamoxifen',
 'NCIT:C100000': 'Percutaneous Coronary Intervention for ST Elevation Myocardial Infarction-Stable-Over 12 Hours From Symptom Onset',
 'NCIT:C100001': 'Percutaneous Coronary Intervention for ST Elevation Myocardial Infarction-Stable After Successful Full-Dose Thrombolytic Therapy',
 'NCIT:C100002': 'Percutaneous Coronary Intervention for ST Elevation Myocardial Infarction-Unstable-Over 12 Hours From Symptom Onset',
 'NCIT:C100003': 'Percutaneous Mitral Valve Repair',
 'NCIT:C100004': 'Pericardial Stripping',
 'NCIT:C100005': 'Post-Cardiac Transplant Evaluation',
 'NCIT:C100006': 'Pre-Operative Evaluation for Non-Cardiovascular Surgery',
 'NCIT:C100007': 'Previously Implanted Cardiac Lead',
 'NCIT:C100008': 'Rescue Percutaneous Coronary Intervention for ST Elevation Myocardial Infarction After Failed Full-Dose Thrombolytic Therapy',
 'NCIT:C100009': 'Reused Card

In [29]:
# name -> id mapping
print({len(name_to_id)})
name_to_id

{162031}


{'Recombinant Amphiregulin': 'NCIT:C1000',
 'Cyclophosphamide/Fluoxymesterone/Mitolactol/Prednisone/Tamoxifen': 'NCIT:C10000',
 'Percutaneous Coronary Intervention for ST Elevation Myocardial Infarction-Stable-Over 12 Hours From Symptom Onset': 'NCIT:C100000',
 'Percutaneous Coronary Intervention for ST Elevation Myocardial Infarction-Stable After Successful Full-Dose Thrombolytic Therapy': 'NCIT:C100001',
 'Percutaneous Coronary Intervention for ST Elevation Myocardial Infarction-Unstable-Over 12 Hours From Symptom Onset': 'NCIT:C100002',
 'Percutaneous Mitral Valve Repair': 'NCIT:C100003',
 'Pericardial Stripping': 'NCIT:C100004',
 'Post-Cardiac Transplant Evaluation': 'NCIT:C100005',
 'Pre-Operative Evaluation for Non-Cardiovascular Surgery': 'NCIT:C100006',
 'Previously Implanted Cardiac Lead': 'NCIT:C100007',
 'Rescue Percutaneous Coronary Intervention for ST Elevation Myocardial Infarction After Failed Full-Dose Thrombolytic Therapy': 'NCIT:C100008',
 'Reused Cardiac Lead': 'NCIT

In [30]:
# Get the name for NCIT:C62554 (poly (ADP-Ribose) Polymerase Inhibitor) AKA PARP Inhibitor
parp = id_to_name['NCIT:C62554']
parp

'Poly (ADP-Ribose) Polymerase Inhibitor'

In [32]:
# Find all ancestors (superclasses) of Parp Inhibitor. 
# Networkx ancestors correlates to 'subterms' & networkx descendants correlates to 'superclasses'
# --> A bit counter-intuitive but now we know

# Within our graph let's get all subterms of Parp Inhibitor (NCIT:C62554)
# --> where Node id: is the corresponding id for the name (i.e. see below)
nx.ancestors(graph, 'NCIT:C62554')
for node in nx.ancestors(graph, 'NCIT:C62554'):
    print('Node id: {:15s} --- maps onto --> \t{}'.format(node, id_to_name[node]))

Node id: NCIT:C140553    --- maps onto --> 	Simmiparib
Node id: NCIT:C71721     --- maps onto --> 	Olaparib
Node id: NCIT:C131178    --- maps onto --> 	Rucaparib Camsylate
Node id: NCIT:C95733     --- maps onto --> 	Talazoparib
Node id: NCIT:C156260    --- maps onto --> 	Temozolomide/Veliparib Regimen
Node id: NCIT:C137800    --- maps onto --> 	Rucaparib
Node id: NCIT:C176969    --- maps onto --> 	PARP/Microtubule Polymerization Inhibitor AMXI-5001
Node id: NCIT:C126274    --- maps onto --> 	Fluzoparib
Node id: NCIT:C167278    --- maps onto --> 	PARP Inhibitor NMS-03305293
Node id: NCIT:C160812    --- maps onto --> 	Rucaparib Regimen
Node id: NCIT:C160120    --- maps onto --> 	Talazoparib Regimen
Node id: NCIT:C78186     --- maps onto --> 	Rucaparib Phosphate
Node id: NCIT:C62526     --- maps onto --> 	Iniparib
Node id: NCIT:C168601    --- maps onto --> 	Senaparib
Node id: NCIT:C160097    --- maps onto --> 	Olaparib Regimen
Node id: NCIT:C160807    --- maps onto --> 	Niraparib Regimen


In [126]:
nx.ancestors(graph, 'NCIT:C62554')
for node in nx.ancestors(graph, 'NCIT:C62554'):
    print('Node id: {:25s} --- maps onto --> \t{}'.format(id_to_name[node], id_to_name[node]))

Node id: Simmiparib                --- maps onto --> 	Simmiparib
Node id: Olaparib                  --- maps onto --> 	Olaparib
Node id: Rucaparib Camsylate       --- maps onto --> 	Rucaparib Camsylate
Node id: Talazoparib               --- maps onto --> 	Talazoparib
Node id: Temozolomide/Veliparib Regimen --- maps onto --> 	Temozolomide/Veliparib Regimen
Node id: Rucaparib                 --- maps onto --> 	Rucaparib
Node id: PARP/Microtubule Polymerization Inhibitor AMXI-5001 --- maps onto --> 	PARP/Microtubule Polymerization Inhibitor AMXI-5001
Node id: Fluzoparib                --- maps onto --> 	Fluzoparib
Node id: PARP Inhibitor NMS-03305293 --- maps onto --> 	PARP Inhibitor NMS-03305293
Node id: Rucaparib Regimen         --- maps onto --> 	Rucaparib Regimen
Node id: Talazoparib Regimen       --- maps onto --> 	Talazoparib Regimen
Node id: Rucaparib Phosphate       --- maps onto --> 	Rucaparib Phosphate
Node id: Iniparib                  --- maps onto --> 	Iniparib
Node id: Senap

In [128]:
# Find immediate parent concepts to Poly (ADP-Ribose) Polymerase Inhibitor (Code C62554)
# graph.out_edges() vs graph.in_edges() 
node = name_to_id['Poly (ADP-Ribose) Polymerase Inhibitor']
for child, parent, key in graph.out_edges(node, keys=True):
    print(f'• {id_to_name[child]} ⟶ {key} ⟶  {id_to_name[parent]}')


• Poly (ADP-Ribose) Polymerase Inhibitor ⟶ is_a ⟶  Targeted Therapy Agent
• Poly (ADP-Ribose) Polymerase Inhibitor ⟶ is_a ⟶  Enzyme Inhibitor
• Poly (ADP-Ribose) Polymerase Inhibitor ⟶ NCIT:R146 ⟶  Poly [ADP-Ribose] Polymerase 1


In [79]:
# All Drugs within the NCI Thesaurus that are defined to exhibit the 'drug class' of Parp Inhibitor 
# AKA all direct children of the concept NCIT_C62554 (Parp Inhibitor)

node = name_to_id['Poly (ADP-Ribose) Polymerase Inhibitor']
for parent, child, key in graph.in_edges(node, keys=True):
    print(f'• {id_to_name[child]} ⟵  {key} ⟵ {id_to_name[parent]}')


• Poly (ADP-Ribose) Polymerase Inhibitor ⟵  is_a ⟵ PARP/Tankyrase Inhibitor 2X-121
• Poly (ADP-Ribose) Polymerase Inhibitor ⟵  is_a ⟵ Pamiparib
• Poly (ADP-Ribose) Polymerase Inhibitor ⟵  is_a ⟵ Fluzoparib
• Poly (ADP-Ribose) Polymerase Inhibitor ⟵  is_a ⟵ Rucaparib Camsylate
• Poly (ADP-Ribose) Polymerase Inhibitor ⟵  is_a ⟵ Niraparib Tosylate Monohydrate
• Poly (ADP-Ribose) Polymerase Inhibitor ⟵  is_a ⟵ Rucaparib
• Poly (ADP-Ribose) Polymerase Inhibitor ⟵  is_a ⟵ Simmiparib
• Poly (ADP-Ribose) Polymerase Inhibitor ⟵  is_a ⟵ Talazoparib Tosylate
• Poly (ADP-Ribose) Polymerase Inhibitor ⟵  is_a ⟵ PARP 1/2 Inhibitor NOV1401
• Poly (ADP-Ribose) Polymerase Inhibitor ⟵  is_a ⟵ PARP Inhibitor NMS-03305293
• Poly (ADP-Ribose) Polymerase Inhibitor ⟵  is_a ⟵ Senaparib
• Poly (ADP-Ribose) Polymerase Inhibitor ⟵  is_a ⟵ Amelparib
• Poly (ADP-Ribose) Polymerase Inhibitor ⟵  is_a ⟵ Atamparib
• Poly (ADP-Ribose) Polymerase Inhibitor ⟵  is_a ⟵ Tankyrase Inhibitor STP1002
• Poly (ADP-Ribose) Polymer

In [94]:
# Olaparib is a newly FDA approved Parp Inhibitor (Ovarian Cancer)
# --> demonstrate the out_edge of the graph (aka immediate parent is Poly (ADP-Ribose) Polymerase Inhibitor)
node = name_to_id['Olaparib']
for parent, child, key in graph.out_edges(node, keys=True):
    print(f'• {id_to_name[child]} ⟵  {key} ⟵ {id_to_name[parent]}')

• Poly (ADP-Ribose) Polymerase Inhibitor ⟵  is_a ⟵ Olaparib


In [96]:
# Olaparib is a newly FDA approved Parp Inhibitor (Ovarian Cancer)
# --> demonstrate the contrast of above --> in_edges of the graph illustrates the NCIt semantic relationship NCIT:R123 (where NCIT:R123 == Chemotherapy_Regimen_Has_Component)
# NIH reference for translation --> https://www.nlm.nih.gov/research/umls/sourcereleasedocs/current/NCI/sourcerepresentation.html

node = name_to_id['Olaparib']
for child, parent, key in graph.in_edges(node, keys=True):
    print(f'• {id_to_name[child]} ⟵  {key} ⟵ {id_to_name[parent]}')

• Olaparib Regimen ⟵  NCIT:R123 ⟵ Olaparib
• Bevacizumab/Olaparib Regimen ⟵  NCIT:R123 ⟵ Olaparib


In [102]:
# Explore another Parp Inhibitor (in_edges)
node = name_to_id['Talazoparib']
for child, parent, key in graph.in_edges(node, keys=True):
    print(f'• {id_to_name[child]} ⟵  {key} ⟵ {id_to_name[parent]}')

• Talazoparib Regimen ⟵  NCIT:R123 ⟵ Talazoparib


In [103]:
# Explore another Parp Inhibitor (out_edges)
node = name_to_id['Talazoparib']
for parent, child, key in graph.out_edges(node, keys=True):
    print(f'• {id_to_name[child]} ⟵  {key} ⟵ {id_to_name[parent]}')

• Poly (ADP-Ribose) Polymerase Inhibitor ⟵  is_a ⟵ Talazoparib


In [104]:
# Pembrolizumab, sold under the brand name Keytruda, is a humanized antibody used in cancer immunotherapy that treats melanoma, lung cancer, head and neck cancer, Hodgkin lymphoma, and stomach cancer.
# Let's explore regimens within NCIt for Pembro. (AKA Keytruda)
# --> Many platinum based chemo regimens 

node = name_to_id['Pembrolizumab']
for parent, child, key in graph.in_edges(node, keys=True):
    print(f'• {id_to_name[child]} ⟵  {key} ⟵ {id_to_name[parent]}')

• Pembrolizumab ⟵  NCIT:R123 ⟵ Carboplatin/Pembrolizumab/Pemetrexed Regimen
• Pembrolizumab ⟵  NCIT:R123 ⟵ Carboplatin/Paclitaxel/Pembrolizumab Regimen
• Pembrolizumab ⟵  NCIT:R123 ⟵ Cisplatin/Pembrolizumab/Pemetrexed Regimen
• Pembrolizumab ⟵  NCIT:R123 ⟵ Pembrolizumab/Pemetrexed Regimen
• Pembrolizumab ⟵  NCIT:R123 ⟵ Cisplatin/Nab-Paclitaxel/Pembrolizumab Regimen
• Pembrolizumab ⟵  NCIT:R123 ⟵ Carboplatin/Nab-Paclitaxel/Pembrolizumab Regimen
• Pembrolizumab ⟵  NCIT:R123 ⟵ Cisplatin/Paclitaxel/Pembrolizumab Regimen
• Pembrolizumab ⟵  NCIT:R123 ⟵ Pembrolizumab Regimen
• Pembrolizumab ⟵  NCIT:R123 ⟵ Carboplatin/Fluorouracil/Pembrolizumab Regimen
• Pembrolizumab ⟵  NCIT:R123 ⟵ Cisplatin/Fluorouracil/Pembrolizumab Regimen
• Pembrolizumab ⟵  NCIT:R123 ⟵ Axitinib/Pembrolizumab Regimen
• Pembrolizumab ⟵  NCIT:R123 ⟵ Avelumab/Axitinib Regimen


In [107]:
# What drug classification via NCI Thesaurus does Pembrolizumab exhibit?
# Anti-PD1 Monoclonal Antibody!
node = name_to_id['Pembrolizumab']
for parent, child, key in graph.out_edges(node, keys=True):
    print(f'• {id_to_name[child]} ⟵  {key} ⟵ {id_to_name[parent]}')

• Anti-PD1 Monoclonal Antibody ⟵  is_a ⟵ Pembrolizumab


In [108]:
# What other drugs are also Anti-PD1 Monoclonal Antibody via NCI Thesaurus?
# --> Similar to Parp Inhibitors, there are many non-FDA approved (investigational) Anti-PD1 Monoclonal Antibody drugs being developed
# ---> I.e. Anti-PD-1 Monoclonal Antibody MEDI0680, Anti-PD-1/LAG-3 Bispecific Antibody EMB-02 etc
node = name_to_id['Anti-PD1 Monoclonal Antibody']
for parent, child, key in graph.in_edges(node, keys=True):
    print(f'• {id_to_name[child]} ⟵  {key} ⟵ {id_to_name[parent]}')

• Anti-PD1 Monoclonal Antibody ⟵  is_a ⟵ Pembrolizumab
• Anti-PD1 Monoclonal Antibody ⟵  is_a ⟵ Anti-PD-1 Monoclonal Antibody MEDI0680
• Anti-PD1 Monoclonal Antibody ⟵  is_a ⟵ Cemiplimab
• Anti-PD1 Monoclonal Antibody ⟵  is_a ⟵ Spartalizumab
• Anti-PD1 Monoclonal Antibody ⟵  is_a ⟵ Tislelizumab
• Anti-PD1 Monoclonal Antibody ⟵  is_a ⟵ Camrelizumab
• Anti-PD1 Monoclonal Antibody ⟵  is_a ⟵ Dostarlimab
• Anti-PD1 Monoclonal Antibody ⟵  is_a ⟵ Cetrelimab
• Anti-PD1 Monoclonal Antibody ⟵  is_a ⟵ Toripalimab
• Anti-PD1 Monoclonal Antibody ⟵  is_a ⟵ Ezabenlimab
• Anti-PD1 Monoclonal Antibody ⟵  is_a ⟵ Sintilimab
• Anti-PD1 Monoclonal Antibody ⟵  is_a ⟵ Balstilimab
• Anti-PD1 Monoclonal Antibody ⟵  is_a ⟵ Retifanlimab
• Anti-PD1 Monoclonal Antibody ⟵  is_a ⟵ Anti-PD-1 Monoclonal Antibody Sym021
• Anti-PD1 Monoclonal Antibody ⟵  is_a ⟵ Prolgolimab
• Anti-PD1 Monoclonal Antibody ⟵  is_a ⟵ Geptanolimab
• Anti-PD1 Monoclonal Antibody ⟵  is_a ⟵ Anti-PD-1 Monoclonal Antibody LZM009
• Anti-PD1 Monocl

In [112]:
sorted(id_to_name[subterm] for subterm in nx.ancestors(graph, 'NCIT:C62554'))

['Amelparib',
 'Atamparib',
 'Bevacizumab/Olaparib Regimen',
 'Fermented Wheat Germ Extract',
 'Fluzoparib',
 'Hypoxanthine',
 'INO-1001',
 'Iniparib',
 'Niraparib',
 'Niraparib Regimen',
 'Niraparib Tosylate Monohydrate',
 'Olaparib',
 'Olaparib Regimen',
 'PARP 1/2 Inhibitor NOV1401',
 'PARP 1/2 Inhibitor SC10914',
 'PARP Inhibitor AZD2461',
 'PARP Inhibitor AZD5305',
 'PARP Inhibitor CEP-9722',
 'PARP Inhibitor E7016',
 'PARP Inhibitor NMS-03305293',
 'PARP-1/2 Inhibitor ABT-767',
 'PARP/Microtubule Polymerization Inhibitor AMXI-5001',
 'PARP/Tankyrase Inhibitor 2X-121',
 'Pamiparib',
 'Rucaparib',
 'Rucaparib Camsylate',
 'Rucaparib Phosphate',
 'Rucaparib Regimen',
 'Senaparib',
 'Simmiparib',
 'Talazoparib',
 'Talazoparib Regimen',
 'Talazoparib Tosylate',
 'Tankyrase Inhibitor STP1002',
 'Temozolomide/Veliparib Regimen',
 'Veliparib',
 'Venadaparib']

In [123]:
# 
sorted(id_to_name[superclass] for superclass in nx.descendants(graph, 'NCIT:C62554'))


['ADP Ribosylation',
 'Anatomic Structure, System, or Substance',
 'Antineoplastic Agent',
 'Base Excision Repair',
 'Biochemical Process',
 'Biological Process',
 'Body Part',
 'Cell',
 'Cell Part',
 'Cell Structure',
 'Cell Survival',
 'Cell Viability Process',
 'Cellular Process',
 'Chromatin',
 'Chromosome',
 'Chromosome Structure',
 'DNA Maintenance',
 'DNA Maintenance Process',
 'DNA Repair',
 'DNA Repair Gene',
 'DNA Repair Protein',
 'DNA Structure',
 'Drug or Chemical by Structure',
 'Drug, Food, Chemical or Biomedical Material',
 'Enzyme Inhibitor',
 'Excision Repair',
 'Gene',
 'Gene Expression',
 'Gene Expression Process',
 'Gene Feature',
 'Gene Product',
 'Intermediary Metabolic Process',
 'Macromolecular Structure',
 'Metabolic Process',
 'Microanatomic Structure',
 'Nuclear Structure',
 'Nucleic Acids',
 'Nucleic Acids, Nucleosides, and Nucleotides',
 'Nucleus',
 'Organic Chemical',
 'PARP1 Gene',
 'PARP1 Gene Product',
 'Pharmacologic Substance',
 'Poly [ADP-Ribose] Po

In [124]:
paths = nx.all_simple_paths(
    graph,
    target=name_to_id['Pharmacologic Substance'],
    source=name_to_id['Anti-PD-1/LAG-3 Bispecific Antibody EMB-02']
)
for path in paths:
    print('•', ' ⟶ ' '\n' ' ⟶ '.join(id_to_name[node] for node in path))


• Anti-PD-1/LAG-3 Bispecific Antibody EMB-02 ⟶ 
 ⟶ Anti-PD1 Monoclonal Antibody ⟶ 
 ⟶ PD1 Inhibitor ⟶ 
 ⟶ Immune Checkpoint Inhibitor ⟶ 
 ⟶ Immune Checkpoint Modulator ⟶ 
 ⟶ Immunotherapeutic Agent ⟶ 
 ⟶ Pharmacologic Substance
• Anti-PD-1/LAG-3 Bispecific Antibody EMB-02 ⟶ 
 ⟶ Anti-PD1 Monoclonal Antibody ⟶ 
 ⟶ PD1 Inhibitor ⟶ 
 ⟶ Targeted Therapy Agent ⟶ 
 ⟶ Antineoplastic Agent ⟶ 
 ⟶ Pharmacologic Substance
• Anti-PD-1/LAG-3 Bispecific Antibody EMB-02 ⟶ 
 ⟶ Anti-PD1 Monoclonal Antibody ⟶ 
 ⟶ Monoclonal Antibody ⟶ 
 ⟶ Biological Agent ⟶ 
 ⟶ Pharmacologic Substance
• Anti-PD-1/LAG-3 Bispecific Antibody EMB-02 ⟶ 
 ⟶ Anti-PD1 Monoclonal Antibody ⟶ 
 ⟶ Monoclonal Antibody ⟶ 
 ⟶ Immunotherapeutic Agent ⟶ 
 ⟶ Pharmacologic Substance
• Anti-PD-1/LAG-3 Bispecific Antibody EMB-02 ⟶ 
 ⟶ Anti-LAG-3 Monoclonal Antibody ⟶ 
 ⟶ Antineoplastic Antibody ⟶ 
 ⟶ Antineoplastic Biological Agent ⟶ 
 ⟶ Antineoplastic Agent ⟶ 
 ⟶ Pharmacologic Substance
• Anti-PD-1/LAG-3 Bispecific Antibody EMB-02 ⟶ 
 ⟶ Ant