In [129]:
#!/usr/bin/env python3

# Import relevant libraries to make HTTP requests and parse JSON response
import requests
import json
import csv
import io
import pandas as pd

# Set disease_id variable for desired disease
disease_id = "MONDO_0001657"

# Build query string to get target information as well as count
query_string = """
query AssociatedTargets {
  disease(efoId: "MONDO_0001657") {
    id
    name
    associatedTargets(page: { size: 3, index: 0 }) {
      rows {
        target {
          id
          approvedName
          approvedSymbol
        }
        score
      }
    }
  }
}
"""

# Set variables object of arguments to be passed to endpoint
variables = {"efoId": disease_id}

# Set base URL of GraphQL API endpoint
base_url = "https://api.platform.opentargets.org/api/v4/graphql"

# Perform POST request and check status code of response
r = requests.post(base_url, json={"query": query_string, "variables": variables})
print(r.status_code)

#Transform API response from JSON into Python dictionary and print in console
api_response = json.loads(r.text)
print('done')
print(api_response)

200
done
{'data': {'disease': {'id': 'MONDO_0001657', 'name': 'brain cancer', 'associatedTargets': {'rows': [{'target': {'id': 'ENSG00000171862', 'approvedName': 'phosphatase and tensin homolog', 'approvedSymbol': 'PTEN'}, 'score': 0.620857438316738}, {'target': {'id': 'ENSG00000141510', 'approvedName': 'tumor protein p53', 'approvedSymbol': 'TP53'}, 'score': 0.6206275457228118}, {'target': {'id': 'ENSG00000115170', 'approvedName': 'activin A receptor type 1', 'approvedSymbol': 'ACVR1'}, 'score': 0.6077789188282776}]}}}}


In [138]:
a=0
#create list of target IDs with associated evidence scores by calling them from dictionary
target_info = api_response['data']['disease']['associatedTargets']['rows']
#print(target_info)

target_id_list = []
#pull IDs and scores from dictionary and add to new list of tuples
while a < len(target_info):
    target_id_list.append((target_info[a]['target'].get('id'), target_info[a]['score']))
    a+=1
print(target_id_list)


[{'target': {'id': 'ENSG00000171862', 'approvedName': 'phosphatase and tensin homolog', 'approvedSymbol': 'PTEN'}, 'score': 0.620857438316738}, {'target': {'id': 'ENSG00000141510', 'approvedName': 'tumor protein p53', 'approvedSymbol': 'TP53'}, 'score': 0.6206275457228118}, {'target': {'id': 'ENSG00000115170', 'approvedName': 'activin A receptor type 1', 'approvedSymbol': 'ACVR1'}, 'score': 0.6077789188282776}]
[('ENSG00000171862', 0.620857438316738), ('ENSG00000141510', 0.6206275457228118), ('ENSG00000115170', 0.6077789188282776)]


In [132]:
#Create new dictionary assigning assays to to each target, and assigning compounds to each assay using PUG REST
# Able to write dictionary at a speed of 4 targets/second

a=0
target_id_assay_dict = {}
for target_id in target_id_list:
    target_id = target_id[0]
# get raw AIDs (Assay IDs) for each Ensemble ID in JSON format using PUG REST
    r = requests.get('https://pubchem.ncbi.nlm.nih.gov/rest/pug/gene/synonym/Ensemble:' + target_id + '/aids/json')
# convert JSON to Python Dictionary
    raw_aids = r.json()
    #print(type(raw_aids))
# isolate raw_aids into a list containing just the AIDs from the dictionary
    try:
        aids = raw_aids['InformationList']['Information'][0]['AID']
    except:
        pass
# assign each Ensamble Target ID to dictionary entry with associated Assay IDs
    #target_id_assay_dict[id] =  aids
        
    #create sub-dictionary for compounds within each assay
    target_id_assay_dict[target_id] =  {}
    for aid in aids:
        aid = str(aid)
        #print(aid)
        # Get raw assay data, including CIDs and Activity data, from PubChem.
        r = requests.get('https://pubchem.ncbi.nlm.nih.gov/assay/pcget.cgi?query=download&record_type=datatable&actvty=active&response_type=display&aid=' + aid)
        
        # Convert data to string
        loose_data = r.text

        
        # Get the index of the information you want, to grab from those columns later
        listed_data = loose_data.split(',')
        # Gets index of CID location, and passes to next assay if CID column is not found
        try:   
            cid_index = listed_data.index("PUBCHEM_CID")
            #print(cid_index)
            #print(type(cid_index))
        except:
            #print("no CIDs found")
            continue
        # Gets index of IC50 column (essentially looks to see of "IC50" is anywhere), and passes if IC50 is not used
        try:
            IC50_index = listed_data.index("IC50")
            #print(IC50_index)
            #print(type(IC50_index))
        except:
            #print("no IC50 found")
            continue
        # Gets index of Pubchem Standard Value (standard unit conversion from IC0, EC50, etc.) and passes to next assay if not found
        try:
            standard_score_index = listed_data.index("PubChem Standard Value")
        except:
            #print("no standard value found")
            continue
        
        fhand = io.StringIO(loose_data)

        cids_with_activities = []
        # Split each line into a list, and use found indicies to get desired data
        for line in fhand:
            line = line.split(',')
            #print(line)
            try:
                int(line[0])
                int(line[cid_index])
                #float(line[IC50_index])
                float(line[standard_score_index])
            except:
                line = ""
                continue
            cid_with_activity = (line[cid_index], line[standard_score_index])
            cids_with_activities.append(cid_with_activity)
        #isolate raw_cids into a list containing just the CIDs from the dictionary
        if len(cids_with_activities) > 0:
            #assign CIDs to associated AID dictionary entry
            target_id_assay_dict[target_id][aid] = cids_with_activities
            print("Compounds found! Adding compounds to dictionary and passing to next assay.")
        else:
            print("No compounds found for assay. Passing to next assay.")
            continue
        
    a+=1
    display(a)
print(target_id_assay_dict)

No compounds found for assay. Passing to next assay.


1

Compounds found! Adding compounds to dictionary and passing to next assay.
Compounds found! Adding compounds to dictionary and passing to next assay.
Compounds found! Adding compounds to dictionary and passing to next assay.
Compounds found! Adding compounds to dictionary and passing to next assay.
Compounds found! Adding compounds to dictionary and passing to next assay.
Compounds found! Adding compounds to dictionary and passing to next assay.
Compounds found! Adding compounds to dictionary and passing to next assay.
Compounds found! Adding compounds to dictionary and passing to next assay.
Compounds found! Adding compounds to dictionary and passing to next assay.
Compounds found! Adding compounds to dictionary and passing to next assay.
Compounds found! Adding compounds to dictionary and passing to next assay.
Compounds found! Adding compounds to dictionary and passing to next assay.
Compounds found! Adding compounds to dictionary and passing to next assay.
Compounds found! Adding c

2

Compounds found! Adding compounds to dictionary and passing to next assay.
Compounds found! Adding compounds to dictionary and passing to next assay.
Compounds found! Adding compounds to dictionary and passing to next assay.
Compounds found! Adding compounds to dictionary and passing to next assay.
Compounds found! Adding compounds to dictionary and passing to next assay.
Compounds found! Adding compounds to dictionary and passing to next assay.
Compounds found! Adding compounds to dictionary and passing to next assay.
Compounds found! Adding compounds to dictionary and passing to next assay.
Compounds found! Adding compounds to dictionary and passing to next assay.
Compounds found! Adding compounds to dictionary and passing to next assay.
Compounds found! Adding compounds to dictionary and passing to next assay.
Compounds found! Adding compounds to dictionary and passing to next assay.
Compounds found! Adding compounds to dictionary and passing to next assay.
Compounds found! Adding c

3

{'ENSG00000171862': {}, 'ENSG00000141510': {'241501': [('11648438', '5.3')], '241830': [('656933', '0.42'), ('44390543', '2.2'), ('44390614', '0.67'), ('44390620', '0.7'), ('44390633', '3.3'), ('44390636', '0.62'), ('44390637', '1.5'), ('44390648', '2.5'), ('44390650', '1.02'), ('44390656', '0.87'), ('44390658', '2.27'), ('44390660', '1.32'), ('44390661', '1.59'), ('44390666', '0.083'), ('44390667', '1.47'), ('44390668', '0.83'), ('44390681', '2.7'), ('44390682', '7.4'), ('44390684', '1.55'), ('44390685', '0.83'), ('44390698', '2.27'), ('44390699', '6.3'), ('44390720', '7.5'), ('44390724', '2.4'), ('44390731', '0.62'), ('44390877', '7.9'), ('44390887', '0.49')], '271647': [('11648438', '5.3')], '438428': [('45483702', '5.84'), ('45483709', '1.72'), ('45483710', '0.3'), ('45483726', '4.63'), ('45483727', '2.08'), ('45483728', '0.91'), ('45483731', '1.73'), ('45483738', '1.4'), ('45483723', '0.44'), ('45483751', '1.21'), ('45483752', '3.2'), ('45483757', '3.06'), ('45483766', '0.99'), ('

In [111]:
theDict = target_id_assay_dict

In [112]:
# Curate new dictionary by removing compounds that have been used in multiple assays
# DO NOT USE YET. Currently code grabs duplicates regardless of target, need to specify only taking duplicates on target level, not disease level

uniqueList = []
repeatList = []
final_unique = []
final_repeat = []

RemovedDuplicateDict = theDict

for target in theDict:
    a=0
    print("checking target " + target + "...")
    for assay in theDict[target]:
        for compound in theDict[target][assay]:
            if compound[0] in uniqueList:
                RemovedDuplicateDict[target][assay].remove(compound)
                if compound[0] not in repeatList:
                    repeatList.append(compound[0])
                    #final_repeat.append(compound)
            else:
                uniqueList.append(compound[0]) 
                #final_unique.append(compound)
                

#print(uniqueList)
#print(final_unique)
#print("\n")
print(repeatList)
print(final_repeat)
#print(RemovedDuplicateDict)
print('done')


checking target ENSG00000184292...
checking target ENSG00000198900...
checking target ENSG00000120217...
['60700', '24360', '10460355', '10452851', '72403', '131182', '45261084', '72402', '22912790', '442674', '138377597', '155529673', '153370141', '104842', '153370123', '117941887', '117951478', '117941742', '91663303', '138632287', '138454801', '154573771', '117941658', '118434619', '118434635', '162650815', '162679299', '91971328', '118434667', '118435037', '117941533', '117941458', '121452777', '117941618', '117942027', '135148063', '135147942', '135147797', '135148050', '135178395', '135147882', '135147850', '135148087', '135148149', '146234758', '146234732', '134165165', '134164909', '134164888', '134164951', '134164954', '134165012', '134165051', '134164902', '134164921', '134164934', '134165087', '134165088', '134165050', '134165049', '139529829', '139529750', '139529795', '139429498', '139430381', '139430373', '139429489', '139429425', '139430287', '139429472', '139429473', '1