In [1]:
#!/usr/bin/env python3

# Import relevant libraries to make HTTP requests and parse JSON response
import requests
import json
import csv
import io

# Set disease_id variable for triple-negative breast cancer
disease_id = "EFO_0005537"

# Build query string to get target information as well as count
query_string = """
query KnownDrugs {
  disease(efoId: "EFO_0005537") {
    id
    name
		associatedTargets (page: {size: 3, index: 0}) {
      rows{
        score
        target{
          id
          approvedName
          approvedSymbol
          knownDrugs{
            rows{
              drug{
                name
                id
              }
            }
          }
        }
      }
    }
}
}
"""

# Set variables object of arguments to be passed to endpoint
variables = {"efoId": disease_id}

# Set base URL of GraphQL API endpoint
base_url = "https://api.platform.opentargets.org/api/v4/graphql"

# Perform POST request and check status code of response
r = requests.post(base_url, json={"query": query_string, "variables": variables})
print(r.status_code)

#Transform API response from JSON into Python dictionary and print in console
api_response = json.loads(r.text)
print('done')
#print(api_response)

200
done


In [2]:
a=0
#create list of target IDs by calling them from dictionary
target_ids = api_response['data']['disease']['associatedTargets']['rows']
#print(target_ids)
target_id_list = []
#pull IDs from dictionary and add to new list
while a < len(target_ids):
    target_id_list.append(target_ids[a]['target'].get('id'))
    a+=1
print(target_id_list)


['ENSG00000184292', 'ENSG00000198900', 'ENSG00000120217']


In [22]:
#Create new dictionary assigning assays to to each target, and assigning compounds to each assay using PUG REST
# Able to write dictionary at a speed of 4 targets/second

a=0
target_id_assay_dict = {}
for target_id in target_id_list:
# get raw AIDs (Assay IDs) for each Ensemble ID in JSON format using PUG REST
    r = requests.get('https://pubchem.ncbi.nlm.nih.gov/rest/pug/gene/synonym/Ensemble:' + target_id + '/aids/json')
# convert JSON to Python Dictionary
    raw_aids = r.json()
    #print(type(raw_aids))
# isolate raw_aids into a list containing just the AIDs from the dictionary
    try:
        aids = raw_aids['InformationList']['Information'][0]['AID']
    except:
        pass
# assign each Ensamble Target ID to dictionary entry with associated Assay IDs
    #target_id_assay_dict[id] =  aids
        
    #create sub-dictionary for compounds within each assay
    target_id_assay_dict[target_id] =  {}
    for aid in aids:
        aid = str(aid)
        print(aid)
        #get raw CIDS (Compound IDs) for each Assay in JSON format using PUG REST
        r = requests.get('https://pubchem.ncbi.nlm.nih.gov/assay/pcget.cgi?query=download&record_type=datatable&actvty=active&response_type=display&aid=' + aid)

        loose_data = r.text

        listed_data = loose_data.split(',')
        try:   
            cid_index = listed_data.index("PUBCHEM_CID")
        except:
            continue
        #print(cid_index)
        #print(type(cid_index))

        activity_index = listed_data.index("PUBCHEM_ACTIVITY_SCORE")
        #print(activity_index)
        #print(type(activity_index))


        fhand = io.StringIO(loose_data)

        cids_with_activities = []
        
        for line in fhand:
            line = line.split(',')
    
            while ('' in line):
                line.remove('')
            try:
                int(line[0])
                int(line[cid_index])
                float(line[activity_index])
            except:
                line = ""
                continue
            cid_with_activity = (line[cid_index], line[activity_index])
            cids_with_activities.append(cid_with_activity)
        #isolate raw_cids into a list containing just the CIDs from the dictionary
        if len(cids_with_activities) > 0:
            #assign CIDs to associated AID dictionary entry
            target_id_assay_dict[target_id][aid] = cids_with_activities
            print("Compounds found! Adding compounds to dictionary and passing to next assay.")
        else:
            print("No compounds found for assay. Passing to next assay.")
            continue
        
    a+=1
    display(a)
print(target_id_assay_dict)

















1904
No compounds found for assay. Passing to next assay.
624099
No compounds found for assay. Passing to next assay.
651810
No compounds found for assay. Passing to next assay.
651811
No compounds found for assay. Passing to next assay.
743121
No compounds found for assay. Passing to next assay.
743454
No compounds found for assay. Passing to next assay.
1117281
No compounds found for assay. Passing to next assay.
1117357
1159506
1159578
No compounds found for assay. Passing to next assay.
1159584
No compounds found for assay. Passing to next assay.
1224830
No compounds found for assay. Passing to next assay.
1346172
No compounds found for assay. Passing to next assay.
1508588
No compounds found for assay. Passing to next assay.


1

1904
No compounds found for assay. Passing to next assay.
56396
56397
56398
56399
56400
56401
56402
56403
56404
56405
56406
56407
56408
56409
56410
56411
56412
56413
56414
56415
56416
56417
56418
56419
56420
56541
56542
56543
56544
56545
56546
56547
56548
56549
56550
56551
56552
56553
56554
56555
56556
56557
56558
56559
56560
56561
compounds found! Adding compounds to dictionary and passing to next assay.
56562
compounds found! Adding compounds to dictionary and passing to next assay.
56563
compounds found! Adding compounds to dictionary and passing to next assay.
56564
56565
56566
compounds found! Adding compounds to dictionary and passing to next assay.
56567
compounds found! Adding compounds to dictionary and passing to next assay.
56568
56569
56570
compounds found! Adding compounds to dictionary and passing to next assay.
56571
compounds found! Adding compounds to dictionary and passing to next assay.
56572
56573
compounds found! Adding compounds to dictionary and passing to next a

2

1904
No compounds found for assay. Passing to next assay.
624099
No compounds found for assay. Passing to next assay.
651810
No compounds found for assay. Passing to next assay.
651811
No compounds found for assay. Passing to next assay.
743121
No compounds found for assay. Passing to next assay.
743454
No compounds found for assay. Passing to next assay.
1117281
No compounds found for assay. Passing to next assay.
1117357
1159506
1159578
No compounds found for assay. Passing to next assay.
1159584
No compounds found for assay. Passing to next assay.
1224830
No compounds found for assay. Passing to next assay.
1229064
compounds found! Adding compounds to dictionary and passing to next assay.
1416967
compounds found! Adding compounds to dictionary and passing to next assay.
1449577
compounds found! Adding compounds to dictionary and passing to next assay.
1449578
compounds found! Adding compounds to dictionary and passing to next assay.
1449579
No compounds found for assay. Passing to n

3

{'ENSG00000184292': {}, 'ENSG00000198900': {'56561': [('10460355', '0.028'), ('10452851', '0.143')], '56562': [('24360', '0.82'), ('14761142', '3.4'), ('14761139', '0.4'), ('14761137', '1'), ('14761134', '0.1'), ('14761123', '0.1'), ('72402', '0.9'), ('10133241', '0.9'), ('14761129', '0.3'), ('14680467', '10'), ('97226', '1'), ('14761147', '2'), ('14680465', '0.5'), ('14761132', '0.9'), ('14761127', '0.3'), ('14761145', '1.7'), ('14761114', '0.8'), ('14761120', '0.8'), ('60700', '3.2'), ('14761164', '0.4'), ('14761163', '2'), ('14761111', '1'), ('14761117', '2.2'), ('14761156', '3'), ('14761155', '0.5'), ('14761107', '0.5'), ('14761104', '0.3'), ('14761109', '0.6'), ('104842', '0.8')], '56563': [('24360', '0.8'), ('14442798', '5'), ('23705698', '0.9')], '56566': [('10745521', '0.34'), ('10769272', '0.6'), ('10768720', '0.52'), ('10814294', '0.16'), ('11800135', '0.14'), ('10816483', '0.1'), ('9982858', '0.22'), ('10506494', '0.25'), ('10577898', '0.19'), ('10768284', '0.21')], '56567':

In [143]:
theDict = target_id_assay_dict

In [144]:
#Curate new dictionary by removing compounds that have been used in multiple assays

uniqueList = []
repeatList = []
RemovedDuplicateDict = theDict

for target in theDict:
    a=0
    print("checking target " + target + "...")
    for assay in theDict[target]:
        for compound in theDict[target][assay]:
            if compound in uniqueList:
                RemovedDuplicateDict[target][assay].remove(compound)
                if compound not in repeatList:
                    repeatList.append(compound)
            else:
                uniqueList.append(compound)   
                

#print(uniqueList)
#print("\n")
#print(repeatList)
#print(RemovedDuplicateDict)
print('done')


checking target ENSG00000184292...
checking target ENSG00000198900...
checking target ENSG00000120217...
done


In [145]:
r = requests.get('https://pubchem.ncbi.nlm.nih.gov/rest/pug/assay/aid/736/sids/JSON?sids_type=active')
raw_736 = r.json()
#print(raw_736)

active_736 = raw_736['InformationList']['Information'][0]['SID']
#print(active_736)

active_736_string = str(active_736).replace(" ", "")
active_736_formatted = active_736_string[1:len(active_736_string)-1]

print(active_736_formatted)

r = requests.get('https://pubchem.ncbi.nlm.nih.gov/rest/pug/assay/aid/' + str(736) + '/concise/CSV?sid=' + active_736_formatted)
print(r)
dose_response = r.json()
print(dose_response)

842204,843683,844782,846220,846490,847305,847359,848536,849093,851321,851380,852129,853296,856169,856175,856251,857318,857384,857386,857655,858283,858389,858513,858611,858637,859929,860619,861006,861727,861921,861945,861951,862684,862725,862844,863470,863640,865492,865665,865967,3711988,3712485,3713127,3714359,3716390,3717602,3717900,4240514,4241470,4242524,4244491,4244974,4247903,4248044,4249480,4251027,4251553,4254596,4255956,4257509,4259339,4261894,4263033,4265474,4265620,7966015,7966305,7969122,7970050,7972728,7978390,14718880,14720488,14720511,14720802,14720867,14721138,14721552,14722452,14723818,14723875,14724052,14724089,14725446,14725477,14725485,14725635,14726390,14726526,14727019,14727532,14727720,14727857,14728264,14728373,14728984,14729027,14729028,14729085,14729194,14729216,14729217,14729238,14729239,14729820,14729821,14729838,14730083,14730495,14730674,14731604,14732895,14733125,14733131,14733523,14733800,14734027,14734928,14736730,14737139,14738431,14738596,14738976,1473

JSONDecodeError: Extra data: line 1 column 6 (char 5)

<class 'str'>


In [14]:
r = requests.get('https://pubchem.ncbi.nlm.nih.gov/assay/pcget.cgi?query=download&record_type=datatable&actvty=active&response_type=display&aid=1811724')
loose_data = r.text


listed_data = loose_data.split(',')

cid_index = listed_data.index("PUBCHEM_CID")
#print(cid_index)
#print(type(cid_index))

activity_index = listed_data.index("PUBCHEM_ACTIVITY_SCORE")
#print(activity_index)
#print(type(activity_index))

fhand = io.StringIO(loose_data)

cids_with_activities = []

for line in fhand:
    line = line.split(',')
    
    while ('' in line):
        line.remove('')
    try:
        int(line[0])
    except:
        line = ""
        continue
    cid_with_activity = (line[cid_index], line[activity_index])
    cids_with_activities.append(cid_with_activity)
print(cids_with_activities)

[('117951478', '0.0675'), ('150469088', '0.0118'), ('166625340', '0.0078'), ('153532996', '0.0179'), ('166626037', '0.013'), ('166626145', '0.0556'), ('156807419', '0.0027'), ('166626497', '0.00359'), ('153533003', '0.0564'), ('166626654', '0.0452'), ('153532989', '0.0712'), ('166626825', '0.0148'), ('166626832', '0.0227'), ('166627038', '0.0267'), ('149610749', '0.0921'), ('153532988', '0.0121'), ('166628251', '0.00811'), ('166628702', '0.0155'), ('166628766', '0.0124'), ('153533002', '0.00477'), ('153532985', '0.0494'), ('156807421', '0.0618'), ('156807422', '0.0277'), ('166629498', '0.0178'), ('149687113', '0.0874'), ('166630179', '0.00565'), ('153533001', '0.0377'), ('149728083', '0.0467'), ('150544999', '0.0102'), ('166631838', '0.00801'), ('166632237', '0.0732'), ('153532993', '0.0209'), ('153532990', '0.00998'), ('151450990', '0.00744'), ('166632690', '0.0127'), ('166632769', '0.061'), ('150565018', '0.00878'), ('166632937', '0.0214'), ('149687313', '0.00891'), ('156807418', '0.