In [1]:
# Import relevant libraries to make HTTP requests and parse JSON response
import requests
import json
import pandas as pd

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [2]:
# Set ensemblid variable
efoId = "EFO_0000222"  # acute myeloid leukemia

# Build query string to get general information about AR and genetic constraint and tractability assessments 
query_string = """
  query targetsAssociatedWithDisease($efoId: String!){
    disease(efoId: $efoId){
      id
      name
      associatedTargets (page: {size: 10000, index: 0}){
        count
        rows{
          target{
            id
            approvedSymbol
            proteinIds{
              id
              source
            }
          }
          score
          datasourceScores {
            id
            score
          }
        }
      }
    }
  }
"""

# Set variables object of arguments to be passed to endpoint
variables = {"efoId": efoId}

# Set base URL of GraphQL API endpoint
base_url = "https://api.platform.opentargets.org/api/v4/graphql"

# Perform POST request and check status code of response
r = requests.post(base_url, json={"query": query_string, "variables": variables})
assert r.status_code == 200, "Error: Response code was not 200 OK"

# Transform API response from JSON into Python dictionary and print in console
api_response = json.loads(r.text)

assert api_response['data']['disease']['associatedTargets']['count'] <= 10000, "Error: should set a higher page size"

In [3]:
print(f"Target hits: {len(api_response['data']['disease']['associatedTargets']['rows'])}")

Target hits: 8311


In [4]:
# Extract target information from the response
targets = api_response['data']['disease']['associatedTargets']['rows']
targets_dict = {}
for idx, each_target in enumerate(targets):
    targets_dict[idx] = {}
    targets_dict[idx]['symbol'] = each_target['target']['approvedSymbol']
    targets_dict[idx]['overall_score'] = each_target['score']
    for each_datasource in each_target['datasourceScores']:
        targets_dict[idx][f"{each_datasource['id']}_score"] = each_datasource['score']
    
    # target uniprot id
    target_proteinIds_df = pd.DataFrame(each_target['target']['proteinIds'])
    if target_proteinIds_df.empty:
        targets_dict[idx]['uniprot_id'] = "Not found"
        continue
    target_uniprot_id_df = target_proteinIds_df[target_proteinIds_df['source'] == 'uniprot_swissprot']
    if target_uniprot_id_df.empty:
        targets_dict[idx]['uniprot_id'] = "Not found"
        continue
    targets_dict[idx]['uniprot_id'] = str(list(target_uniprot_id_df['id'].values))

In [5]:
# Convert dictionary to dataframe and sort by overall score
targets_df = pd.DataFrame(targets_dict).T
main_colname = ['symbol', 'overall_score', 'uniprot_id']        # put important columns first
rest_of_colname = targets_df.columns.drop(['symbol', 'overall_score', 'uniprot_id']).tolist()
targets_df = targets_df[main_colname + rest_of_colname]
targets_df.sort_values(by='overall_score', ascending=False, inplace=True)

In [6]:
targets_df.to_csv('target_associated_with_AML.csv', index=False)