<a href="https://colab.research.google.com/github/mahopman/IEMB-Net/blob/main/clinical_trials/download_clinical_trials.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [42]:
local_path = '/IEBM-Net_Data'

In [43]:
output_path = f'{local_path}/clinical_trials'

In [44]:
!mkdir -p $output_path

In [45]:
def extract_nctid(study):
    nctid = study['protocolSection']['identificationModule']['nctId']
    return nctid

In [46]:
def extract_interventions(study):
    try:
        interventions = study['protocolSection']['armsInterventionsModule']['interventions']
    except:
        return None
    intervention_list = list(set([intervention['type'] for intervention in interventions]))
    return intervention_list

In [47]:
from posixpath import supports_unicode_filenames
import requests

def get_clinical_trials(params):
    base_url = "https://clinicaltrials.gov/api/v2/studies"

    response = requests.get(base_url, params=params)

    if response.status_code == 200:
        data = response.json()
        return data
    else:
        print(f"Error: {response.status_code} - {response.text}")

In [50]:
import json

nctid2intervention = {}

params = {
    'fields': 'NCTId,InterventionType',  # Fields to retrieve
    'pageSize': 1000,  # Number of results per page
    'format': 'json'  # Response format
}

data = get_clinical_trials(params)

studies = data['studies']

for study in studies:
    nctid = extract_nctid(study)
    interventions = extract_interventions(study)
    if interventions is not None:
        nctid2intervention[nctid] = interventions

try:
    data['nextPageToken']
    next_page = True
except:
    next_page = False

while next_page:
    params['pageToken'] = data['nextPageToken']

    data = get_clinical_trials(params)

    studies = data['studies']

    for study in studies:
        nctid = extract_nctid(study)
        interventions = extract_interventions(study)
        if interventions is not None:
            nctid2intervention[nctid] = interventions

    try:
        data['nextPageToken']
        next_page = True
    except:
        next_page = False

with open(f'{output_path}/nctid2intervention.json', 'w') as f:
    json.dump(nctid2intervention, f)