In [1]:
# include path ../src in order to import modules
import sys
sys.path.append('../TCT')
import TCT as TCT
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd


In [2]:
# Step1: List all the APIs in the translator system
Translator_KP_info,APInames= TCT.get_SmartAPI_Translator_KP_info()
Translator_KP_info.to_csv('SmartAPI_Translator_KP_info.csv', index=False)
print(len(Translator_KP_info))

52


In [3]:
# Step 2: Get metaKG from Translator APIs
metaKG = TCT.get_KP_metadata(APInames) # This only applies to the Translator APIs

In [None]:
# add  new APIs to the list
#APInames['ctkp'] = 'https://multiomics.rtx.ai:9990/ctkp/meta_knowledge_graph'
#APInames['mokp'] = 'https://multiomics.rtx.ai:9990/mokp'
#APInames['dakp'] = 'https://multiomics.rtx.ai:9990/dakp'
#APInames['mbkp'] = 'https://multiomics.rtx.ai:9990/mbkp'
APInames['CATRAX-PharmacogenomicsKG'] = 'https://multiomics.rtx.ai:9990/PharmacogenomicsKG'
APInames['BigGIM'] ="https://multiomics.rtx.ai:9990/BigGIM_DrugResponse_PerformancePhase"

In [None]:
# Step 3 (optional): add new metaKG from a user defined API

url = 'https://multiomics.rtx.ai:9990/BigGIM_DrugResponse_PerformancePhase/meta_knowledge_graph'
response = requests.get(url)
data = response.json()
for i in range(len(data["edges"])):
    APInames, metaKG = TCT.add_new_API_for_query(APInames, metaKG, "BigGIM", "https://multiomics.rtx.ai:9990/BigGIM_DrugResponse_PerformancePhase/query", data["edges"][i]['predicate'], data["edges"][i]['subject'], data["edges"][i]['object'])

url = 'https://multiomics.rtx.ai:9990/PharmacogenomicsKG/meta_knowledge_graph'
response = requests.get(url)
data = response.json()
for i in range(len(data["edges"])):
    APInames, metaKG = TCT.add_new_API_for_query(APInames, metaKG, "PharmacogenomicsKG", "https://multiomics.rtx.ai:9990/PharmacogenomicsKG/query", data["edges"][i]['predicate'], data["edges"][i]['subject'], data["edges"][i]['object'])
# Step 4: Save the metaKG to a file
metaKG.to_csv('../metaData/metaKG.csv', index=False)

import requests
url = 'https://multiomics.rtx.ai:9990/ctkp/meta_knowledge_graph'
response = requests.get(url)
data = response.json()

for i in range(len(data["edges"])):
    APInames, metaKG = TCT.add_new_API_for_query(APInames, metaKG, "CTKP", "https://multiomics.rtx.ai:9990/ctkp/query", data["edges"][i]['predicate'], data["edges"][i]['subject'], data["edges"][i]['object'])


url = 'https://multiomics.rtx.ai:9990/dakp/meta_knowledge_graph'
response = requests.get(url)
data = response.json()
for i in range(len(data["edges"])):
    APInames, metaKG = TCT.add_new_API_for_query(APInames, metaKG, "DAKP", "https://multiomics.rtx.ai:9990/dakp/query", data["edges"][i]['predicate'], data["edges"][i]['subject'], data["edges"][i]['object'])


In [None]:
metaKG.loc[metaKG['API']=="PharmacogenomicsKG"].to_csv('../metaData/PharmacogenomicsKG_meta.csv', index=False)

In [None]:
# Step 3: set input parameters
# Test multiomics BigGIM Drug Response KP
# Node1 for query
input_node1 = 'B-cell'
input_node1_id = TCT.get_curie(input_node1)
print(input_node1_id)
input_node1_id = 'NCBIGene:2215'
input_node1_list = [input_node1_id]
input_node1_category = ['biolink:Gene','biolink:Protein'] # Node: this has to be in a format of biolink:xxx

#Node2 for query
input_node2_list = []
input_node2_category = ['biolink:Cell'] # Node: this has to be in a format of biolink:xxx
#input_node2_category = ['biolink:Gene']

# Get all predicates for the input node1 and node2, user can furter select the predicates among this list
sele_predicates = list(set(TCT.select_concept(sub_list=input_node1_category,
                                              obj_list=input_node2_category,
                                              metaKG=metaKG)))



print("all relevant predicates in Translator:")
print(sele_predicates)
# select predicates

# Get all APIs for the input node1 and node2, user can furter select the APIs among this list
sele_APIs = TCT.select_API(sub_list=input_node1_category,
                           obj_list=input_node2_category,
                           metaKG=metaKG)

print("all relevant APIs in Translator:")
print(sele_APIs)
print(len(sele_APIs))

# get API URLs
API_URLs = TCT.get_Translator_API_URL(sele_APIs, 
                                      APInames)

In [None]:
# Step 4: Format query json
query_json = TCT.format_query_json(input_node1_list,  # a list of identifiers for input node1
                                   input_node2_list,  # it can be empty list if only want to query node1
                                   input_node1_category,  # a list of categories of input node1
                                   input_node2_category,  # a list of categories of input node2
                                   sele_predicates) # a list of predicates
query_json

In [None]:

# Step 5: Query Translator APIs and parse results
result = TCT.parallel_api_query(API_URLs,query_json=query_json, max_workers=len(API_URLs))

# Step 6: Parse results
result_parsed = TCT.parse_KG(result)

# Step 7: Ranking the results. This ranking method is based on the number of unique primary infores. It can only be used to rank the results with one defined node.
result_ranked_by_primary_infores = TCT.rank_by_primary_infores(result_parsed, input_node1_id)   # input_node1_id is the curie id of the input node, such as "NCBIGene:1017"


In [None]:
print(len(result.keys()))
result[list(result.keys())[0]]

In [None]:
# Step 8: Visualize the results
TCT.visulization_one_hop_ranking(result_ranked_by_primary_infores, result_parsed, 
                                num_of_nodes = 50, input_query = input_node1_id, 
                                fontsize = 5)