In [1]:
# Author: Gergely Zahoranszky-Kohalmi, PhD
#
# Organization: National Center for Advancing Translational Sciences (NCATS/NIH)
#
# Email: gergely.zahoranszky-kohalmi@nih.gov


In [2]:
# Timer start
import time
start_time = time.perf_counter()

In [3]:
import requests
import json
import pandas as pd
import networkx as nx
from sg import *
import time



In [4]:
# Config Section


UNIPROT_PTGS2 = 'P35354'
UNIPROT_CASP8 = 'Q14790'

PPI_DISTANCE = 4
SHORTEST_PATH = True
ACTIVITY_CUTOFF_uM = 1
LIMIT_PREDICTED_COMPOUNDS = 1000

PERTURBATION_MODE = 'target'    # undirected: upstream and downstream | source: downstream only | target: upstream only
CONFIDENCE_CUTOFF = 0


MIN_OVERLAP = 0.75
MAX_OVERLAP = -1    # -1: Not observed, no max ratio imposed



In [5]:
# Workflow Outline



# 1. Predict molecule COX-2 (PTGS2) UniprotID: P35354

# 2. Take into account the only predicted compoounds that overlap with the respective potent pattern by at least the 80th 
#    percentile overlap ratio value

# 3. Get Upstream regulators of CASP-8 up to distance 2

# 4. See if any of the predicted drugs of for COX-2 is among the known potent molecules of any of the
#    regulators (including CASP-8 modulators)



In [6]:
def get_predictions (target, limit):
    # target: uniprot ID of target protein
    url_sg = 'https://smartgraph.scb-ncats.io/api/predict/' + target

    cargo = {'limit': limit,
             'format': 'json'}

    headers = {'Content-Type': 'application/json', 'accept': 'application/json'}
    
    print ()
    print ("[->] SmartGraph pathway analysis started ..")
    print ()
    
    try:
        api_response = requests.get(url = url_sg, params=json.dumps(cargo), headers = headers)
    
        result = api_response.json()
    except:
        raise Exception ("[ERROR] Something went wrong when calling SmartGraph endpoint.")
    
    print ("[*] .. SmartGraph pathway analysis done.")
    print()
    print()
    
    return (result)


def assess_perturbation(uniprot_id, shortest_paths_only, max_hops, mode, min_confidence):
    
    #urls = parse_urls (FILE_URLS)
    url_sg = 'https://smartgraph.scb-ncats.io/api/path_regulatory_open'
    
    headers = {'Content-Type': 'application/json', 'accept': 'application/json'}
    
    cargo = {
          "uniprot_ids": [
            uniprot_id
          ],
          "shortest_paths": shortest_paths_only,
          "max_length": max_hops,
          "explore_mode": mode,
          "confidence_cutoff": min_confidence
        }
    
    print ()
    print ("[->] SmartGraph pathway analysis started ..")
    print ()
    
    try:
        api_response = requests.post(url = url_sg, data=json.dumps(cargo), headers = headers)
    
        result = api_response.json()
    except:
        raise Exception ("[ERROR] Something went wrong when calling SmartGraph endpoint.")
    
    print ("[*] .. SmartGraph pathway analysis done.")
    print()
    print()
    
    return (result)



def get_potent_compounds (uniprot_id, cutoff_in_uM = 10):
    # only activity values of the defined cutoff (in uM unit) and less (i.e. more potent) will be reported

    
    url_sg = 'https://smartgraph.scb-ncats.io/api/bioactivity_target'
    
    headers = {'Content-Type': 'application/json', 'accept': 'application/json'}
    
    cargo = {
              "target_uniprot_ids": [
                uniprot_id
              ],
              "activity_cutoff": cutoff_in_uM,
              "activity_type": "activity"
            }



    
    print ()
    print ("[->] SmartGraph potent compounds analysis started ..")
    print ()
    
    try:
        api_response = requests.post(url = url_sg, data=json.dumps(cargo), headers = headers)
    
        result = api_response.json()
    except:
        raise Exception ("[ERROR] Something went wrong when calling SmartGraph endpoint.")
    
    print ("[*] .. SmartGraph potent compounds analysis done.")
    print()
    print()


    
    
    return (result)


def filter_predicted_compounds (G, min_ratio = 0, max_ratio = -1):
    filtered_compounds = set()
    
    for start_node, end_node, e_attributes in G.edges(data = True):
        #print (e_attributes)

        if (e_attributes['edge_type'] == 'pattern_of') and (e_attributes['ratio'] >= min_ratio):
            
            if  (max_ratio != -1):
                
                if (e_attributes['ratio'] <= max_ratio):
                    filtered_compounds.add(end_node)
                
            else:
                filtered_compounds.add(end_node)  

    return (filtered_compounds)
                

def create_prediction_subgraph (G, filtered_compounds):
    # filtered compounds is a set, like = set ()
    
    S = G.copy()
    
    nodes = set()
    nodes = filtered_compounds

    
    for n, n_attr in S.nodes(data = True):
        
        if n_attr['node_type'] == 'target':
            nodes.add(n)
    
    
    S = S.subgraph(nodes)

    R = S.copy()

    targets = []
    compounds = []    

    for n, n_attr in S.nodes(data = True):
        
        if n_attr['node_type'] == 'target':
            targets.append(n)
        
        elif n_attr['node_type'] == 'compound':
            compounds.append(n)

    for c in compounds:
        for t in targets:
            R.add_edge (c, t)

    for start_node, end_node, e_attr in R.edges(data = True):
        R[start_node][end_node]['start_node'] = start_node
        R[start_node][end_node]['end_node'] = end_node
        R[start_node][end_node]['edge_type'] = 'predicted_dti'
        


    return (R)


def get_target_nodes (G):
    targets = []
    for n, n_attr in G.nodes(data = True):
        if n_attr['node_type'] == 'target':
            targets.append (n)

    targets = list(set(targets))
    
    return (targets)

In [7]:
# Step 1


# 1. Predict molecule COX-2 (PTGS2) UniprotID: P35354


sg_json = get_predictions (UNIPROT_PTGS2, LIMIT_PREDICTED_COMPOUNDS)

G = sg_json2graph (sg_json)

cy_json = graph2cyjs (G)

visualize_in_cytoscape (cy_json)





[->] SmartGraph pathway analysis started ..

[*] .. SmartGraph pathway analysis done.


DiGraph with 318 nodes and 317 edges
Network created with SUID: 728941
Network view created.
Existing styles: ['Curved', 'Sample1', 'BioPAX_SIF_0', 'Marquee', 'Big Labels', 'default black', 'size_rank', 'Sample3', 'default', 'Gradient1', 'SynGPS', 'Ripple', 'BioPAX_0', 'SynGPS API', 'BioPAX', 'Nested Network Style', 'Solid', 'Universe', 'Directed', 'Minimal', 'Sample2', 'BioPAX_SIF']
Creating new style 'SmartGraph API'.
New style 'SmartGraph API' created.
Style 'SmartGraph API' applied to the network.
Layout 'grid' applied to the network.


In [8]:
# Step 2.

# 2. Take into account the only predicted compoounds that overlap with the respective potent pattern by at least the 80th 
#    percentile overlap ratio value

filtered_compounds = filter_predicted_compounds (G, min_ratio = 0.75, max_ratio = -1)

print (filtered_compounds)



P = create_prediction_subgraph (G, filtered_compounds)

cy_json = graph2cyjs (P)

visualize_in_cytoscape (cy_json)




# 4. See if any of the predicted drugs of for COX-2 is among the known potent molecules of any of the
#    regulators (including CASP-8 modulators)



S = P.copy()







{'WDGYCEHMFMSJEW-UHFFFAOYSA-N', 'NRRDZUADGNIUCD-UHFFFAOYSA-N', 'OXGUCUVFOIWWQJ-WLTRVTDYSA-N', 'KXFAISXPYYJVCJ-GRSHGNNSSA-N', 'PZZRDJXEMZMZFD-IEGSVRCHSA-N', 'JPUKWEQWGBDDQB-QSOFNFLRSA-N', 'DGJYBDOWQFCDHE-UHFFFAOYSA-N', 'PZZRDJXEMZMZFD-SQIUQIIDSA-N', 'GXDOWNQTKLTXEW-UHFFFAOYSA-N', 'XJMXHTQZLWXEDW-UHFFFAOYSA-N', 'INHCQGHDHUOQIX-UHFFFAOYSA-N', 'RUAGOKUSUUWRQE-UHFFFAOYSA-N', 'FUFKFSVXLFTGSP-UHFFFAOYSA-N', 'RNVUDWOQYYWXBJ-IEGSVRCHSA-N', 'WSIZFTULXCTIHZ-UHFFFAOYSA-N', 'SJKDLOJANSYBDC-UHFFFAOYSA-N', 'MYPBQYHRXMVYIY-UHFFFAOYSA-N', 'UMUBKDCPBROUFT-UHFFFAOYSA-N', 'QDGLVKCJNVHFCT-NDENLUEZSA-N', 'OHCLXXFQIPGZRS-UHFFFAOYSA-N', 'YKEOPYUONVHGAZ-JPJNWVEQSA-N', 'PZZRDJXEMZMZFD-ZMQFRBSTSA-N', 'DNITTWSXYHDGKU-UHFFFAOYSA-N', 'TWKARRBOKOIBHF-UHFFFAOYSA-N', 'KYQBXJFBRGTRJO-UHFFFAOYSA-N', 'SQOFVHPBOAQZCT-UHFFFAOYSA-N', 'RBZAXFZNNVDTJO-UHFFFAOYSA-N', 'LCKGEDRQDVZBSX-UHFFFAOYSA-N', 'ZGCKMTQNGWQKEN-VQHVLOKHSA-N', 'NUFNGCDVYZKSKE-HOHGHBTGSA-N', 'FEISFUIEFYIRAS-DHZHZOJOSA-N', 'KWYARAWBLOGQHJ-UHFFFAOYSA-N', 'PZZRDJ

In [9]:
# Step 3.

# 3. Get Upstream regulators of CASP-8 up to distance 2

# Regulatory path open ended (start or end node), i.e. undirected, max step: 2

# Target: Q14790 (Casp-8)

# curl -X 'GET' \
#   'https://smartgraph.scb-ncats.io/api/path_regulatory_open/Q14790?shortest_paths=true&max_length=2&explore_mode=undirected&confidence_cutoff=0&format=json' \
#   -H 'accept: application/json'

sg_json = assess_perturbation (UNIPROT_CASP8, SHORTEST_PATH, PPI_DISTANCE, PERTURBATION_MODE, CONFIDENCE_CUTOFF)


#print (sg_json)

print ('orig PPI graph')
H = sg_json2graph (sg_json)

#print (H)
cy_json_H = graph2cyjs (H)
visualize_in_cytoscape (cy_json_H)


# Iterate through all *upstream* targets, get target- compounds pairs, 




# this will be a Set:
#ancestors = nx.ancestors(H, UNIPROT_CASP8)

#print (ancestors)
#ancestors_l = list(ancestors)

#print (ancestors_l)


# Adding study subject node back to nodes to create the upstream-targey induced subgraph
#ancestors.add(UNIPROT_CASP8)


#A = H.subgraph(ancestors)


#print ('ancestor induced subgraph')
#print (A)




S = nx.compose(S, H)

#cy_json = graph2cyjs (A)

#visualize_in_cytoscape (cy_json)





# Get potent compounds of study_subject
# unit returned by SmartGraph API is in uM

dti_casp8_json = get_potent_compounds (UNIPROT_CASP8, cutoff_in_uM = ACTIVITY_CUTOFF_uM)

C_0 = sg_json2graph (dti_casp8_json)



#S = nx.compose(S, A)

S = nx.compose(S, C_0)

cy_json = graph2cyjs (S)

visualize_in_cytoscape (cy_json)






# Get potent compoudns of upstream neighbors

networks = {}


targets = get_target_nodes (H)

nr_targets = len(targets)
idx = 1

for target in targets:

    print (f'[*] Processing target nr. {idx} of total {nr_targets} targets.')
    idx += 1
    
    if target != UNIPROT_CASP8 and target != UNIPROT_PTGS2:
        potent_compounds = get_potent_compounds (target, cutoff_in_uM = ACTIVITY_CUTOFF_uM)
        networks[target] = sg_json2graph (potent_compounds)


for ancestor in networks.keys():
    if ancestor != UNIPROT_CASP8 and ancestor != UNIPROT_PTGS2:
    
        A = networks[ancestor]
    
        # Note DTI already contains the STUDYSUBJECT TARGET's potent compounds
        S = nx.compose(S, A)

cy_json = graph2cyjs (S)


# Timer Stop
end_time = time.perf_counter()
duration = end_time - start_time
print(f'[*] Workflow duration (before last visualization step): {duration:.1f} sec')

visualize_in_cytoscape (cy_json)





[->] SmartGraph pathway analysis started ..

[*] .. SmartGraph pathway analysis done.


orig PPI graph
DiGraph with 766 nodes and 781 edges
Network created with SUID: 733119
Network view created.
Existing styles: ['Sample1', 'Marquee', 'Sample3', 'SynGPS', 'BioPAX_0', 'Nested Network Style', 'Solid', 'Universe', 'SmartGraph API', 'Sample2', 'BioPAX_SIF', 'SmartGraph API_0', 'Curved', 'BioPAX_SIF_0', 'Big Labels', 'default black', 'size_rank', 'default', 'Gradient1', 'Ripple', 'SynGPS API', 'BioPAX', 'Directed', 'Minimal']
Creating new style 'SmartGraph API'.
New style 'SmartGraph API' created.
Style 'SmartGraph API' applied to the network.
Layout 'grid' applied to the network.

[->] SmartGraph potent compounds analysis started ..

[*] .. SmartGraph potent compounds analysis done.


DiGraph with 240 nodes and 239 edges
Network created with SUID: 741031
Network view created.
Existing styles: ['Sample1', 'Marquee', 'Sample3', 'SynGPS', 'BioPAX_0', 'Nested Network Style', 'Solid', 'Univer

In [10]:
# Timer Stop
end_time = time.perf_counter()
duration = end_time - start_time
print(f'[*] Workflow duration: {duration:.1f} sec')



[*] Workflow duration: 286.2 sec


In [11]:
# Ref: https://www.freecodecamp.org/news/loading-a-json-file-in-python-how-to-read-and-parse-json/
# Ref: https://networkx.org/documentation/stable/reference/readwrite/generated/networkx.readwrite.json_graph.cytoscape_data.html#networkx.readwrite.json_graph.cytoscape_data
# Ref: https://networkx.org/documentation/stable/reference/readwrite/generated/networkx.readwrite.json_graph.cytoscape_graph.html
# Ref: https://groups.google.com/g/cytoscape-helpdesk/c/keumGM-bwz0
# Ref: https://networkx.org/documentation/networkx-1.9/reference/generated/networkx.readwrite.graphml.write_graphml.html
# Ref: https://github.com/cytoscape/copycat-layout/blob/master/notebooks/Copycat%20Automation%20Example.ipynb
# Ref: ChatGPT 4.0 [https://chat.openai.com]
# Ref: https://www.geeksforgeeks.org/python-map-function/
# Ref: https://stackoverflow.com/questions/24898797/check-if-key-exists-and-iterate-the-json-array-using-python
# Ref: https://htmlcolorcodes.com/colors/brick-red/
# Ref: https://stackoverflow.com/questions/32652149/combine-join-networkx-graphs
# Ref: https://networkx.org/documentation/stable/reference/classes/generated/networkx.Graph.copy.html
# Ref: https://www.w3schools.com/python/ref_requests_get.asp
# Ref: https://builtin.com/articles/timing-functions-python


