In [1]:
import requests
import csv
import json

In [2]:
# output file location + filename
out_file_dir = '/Users/talgalper/Desktop/breast_carcinoma_known_drugs.tsv'

In [3]:

# Define the URL for the GraphQL API
url = "https://api.platform.opentargets.org/api/v4/graphql"

In [4]:

# Define the GraphQL query to fetch known drugs for the disease "MONDO_0007254" (breast carcinoma)
query = """
query KnownDrugsQuery(
  $efoId: String!
  $cursor: String
  $freeTextQuery: String
  $size: Int = 100
) {
  disease(efoId: $efoId) {
    id
    knownDrugs(cursor: $cursor, freeTextQuery: $freeTextQuery, size: $size) {
      count
      cursor
      rows {
        phase
        status
        urls {
          name
          url
        }
        disease {
          id
          name
        }
        drug {
          id
          name
          mechanismsOfAction {
            rows {
              actionType
              targets {
                id
              }
            }
          }
        }
        drugType
        mechanismOfAction
        target {
          id
          approvedName
          approvedSymbol
        }
      }
    }
  }
}
"""

In [5]:

# Define the initial variables for the query
variables = {
  "efoId": "MONDO_0007254",
  "cursor": None,
  "size": 100  # Adjust size as needed, maximum depends on the API limit
}

In [6]:

# Function to send the request and get data
def fetch_data(variables):
    response = requests.post(url, json={'query': query, 'variables': variables})
    response.raise_for_status()  # Raise an error for bad status codes
    return response.json()

# Initialize the list to hold all rows
all_rows = []

# Pagination loop
while True:
    data = fetch_data(variables)
    
    # Check if 'data' and 'disease' keys are in the response
    if 'data' in data and 'disease' in data['data']:
        known_drugs = data['data']['disease']['knownDrugs']
        rows = known_drugs['rows']
        all_rows.extend(rows)
        
        # Update the cursor for the next page
        variables['cursor'] = known_drugs['cursor']
        
        # Break the loop if there's no more data to fetch
        if not variables['cursor']:
            break
    else:
        print("Error: Unexpected response structure")
        break

# Define TSV columns
columns = [
    'Phase', 'Status', 'URL Name', 'URL', 'Disease ID', 'Disease Name',
    'Drug ID', 'Drug Name', 'Mechanism of Action', 'Action Type', 'Target ID',
    'Target Approved Name', 'Target Approved Symbol', 'Drug Type'
]

In [7]:

# Write to TSV
with open(out_file_dir, 'w', newline='') as file:
    writer = csv.writer(file, delimiter='\t')
    writer.writerow(columns)
    for row in all_rows:
        for moa in row['drug']['mechanismsOfAction']['rows']:
            for target in moa['targets']:
                writer.writerow([
                    row['phase'],
                    row['status'],
                    row['urls'][0]['name'] if row['urls'] else '',
                    row['urls'][0]['url'] if row['urls'] else '',
                    row['disease']['id'],
                    row['disease']['name'],
                    row['drug']['id'],
                    row['drug']['name'],
                    row['mechanismOfAction'],
                    moa['actionType'],
                    target['id'],
                    row['target']['approvedName'] if row['target'] else '',
                    row['target']['approvedSymbol'] if row['target'] else '',
                    row['drugType']
                ])
