In [1]:
import requests
import pandas as pd

In [2]:
# BASE_URL = "https://clinicaltrials.gov/data-api/api/v1/studies"
# curl -X GET "https://clinicaltrials.gov/api/v2/studies?query.id=BP28248" \
#  -H "accept: application/json" \

BASE_URL = "https://clinicaltrials.gov/api/v2/studies?format=json&query.id="


In [3]:
def fetch_study_info(study_id):
    """
    Fetch study information from ClinicalTrials.gov API using a study ID.
    """
    url = f"{BASE_URL}/{study_id}"
    response = requests.get(url)

    if response.status_code == 200:
        return response.json()
    else:
        raise Exception(f"Error fetching data: {response.status_code}, {response.text}")

In [None]:
study_info = fetch_study_info("BP28248")
study_info

In [12]:
def format_study_info(study_data):
    """
    Extract relevant fields and format as a DataFrame for tabular output.
    """
    # Example: Extract key fields (adjust based on API response)
    fields = {
        "NCT ID": study_data.get("nctId",""),
        "Study ID": study_data.get("studyId", ""),
        "Title": study_data.get("title", ""),
        "Condition": study_data.get("condition", ""),
        "Status": study_data.get("status", ""),
        "Start Date": study_data.get("startDate", ""),
        "Completion Date": study_data.get("completionDate", "")
    }
    return pd.DataFrame([fields])

In [13]:
study_table = format_study_info(study_info)
study_table

Unnamed: 0,NCT ID,Study ID,Title,Condition,Status,Start Date,Completion Date
0,,,,,,,


In [None]:
#curl -X GET "https://clinicaltrials.gov/api/v2/studies?query.titles=high+blood+pressure" \
#  -H "accept: application/json"

BASE_URL1 = "https://clinicaltrials.gov/api/v2/studies?format=json&query.titles="

def fetch_trials_by_condition(condition):
    """
    Fetch a list of trials from ClinicalTrials.gov API that match a given condition or disease.
    """

    url = f"{BASE_URL1}/{condition}"
    response = requests.get(url)

    if response.status_code == 200:
        data = response.json()
        # Extract relevant details about each trial (e.g., NCT ID and title)
        trials = [
            {"nct_id": trial.get("id"), "title": trial.get("brief_title")}
            for trial in data.get("studies", [])
        ]
        return pd.DataFrame(trials)  # Convert to a DataFrame for easier analysis
    else:
        raise Exception(f"Error fetching data: {response.status_code}, {response.text}")



condition_trials = fetch_trials_by_condition("diabetes")
print("Condition Trials:")
print(condition_trials)

In [8]:
import requests
import pandas as pd

BASE_URL = "https://clinicaltrials.gov/api/v2/studies"

def fetch_study_info(study_id):
    """
    Fetch study information from ClinicalTrials.gov API using a study ID.
    """
    url = f"{BASE_URL}?format=json&query.id={study_id}"
    response = requests.get(url)

    if response.status_code == 200:
        return response.json()
    else:
        raise Exception(f"Error fetching data: {response.status_code}, {response.text}")
    
study_info = fetch_study_info("BP28248")
print("Study Info:", study_info)

Study Info: {'studies': [{'protocolSection': {'identificationModule': {'nctId': 'NCT01677754', 'orgStudyIdInfo': {'id': 'BP28248'}, 'secondaryIdInfos': [{'id': '2012-000943-29', 'type': 'EUDRACT_NUMBER'}], 'organization': {'fullName': 'Hoffmann-La Roche', 'class': 'INDUSTRY'}, 'briefTitle': 'A Study of RO4602522 in Participants With Moderate Severity Alzheimer Disease on Background Alzheimer Disease Therapy', 'officialTitle': "A Phase II, Multicenter, Randomized, Double-Blind, Parallel-Group, Placebo-Controlled Study to Investigate the Efficacy and Safety of RO4602522 Added to Background Alzheimer's Disease Therapy in Patients With Moderate Severity Alzheimer's Disease", 'acronym': 'MAyflOwer RoAD'}, 'statusModule': {'statusVerifiedDate': '2017-05', 'overallStatus': 'COMPLETED', 'expandedAccessInfo': {'hasExpandedAccess': False}, 'startDateStruct': {'date': '2012-10-24', 'type': 'ACTUAL'}, 'primaryCompletionDateStruct': {'date': '2015-06-12', 'type': 'ACTUAL'}, 'completionDateStruct': 

In [12]:
BASE_URL = "https://clinicaltrials.gov/api/v2/studies"

def fetch_trials_by_condition(condition):
    """
    Fetch a list of trials from ClinicalTrials.gov API that match a given condition or disease.
    """
    url = f"{BASE_URL}?format=json&query.cond={condition}"
    response = requests.get(url)

    if response.status_code == 200:
        data = response.json()

        # Debugging: Print the raw JSON response to understand its structure
        print("API Response:", data)

        # Extract the studies list and handle cases where it's missing or empty
        studies = data.get("studies", [])
        if not studies:
            print("No studies found for the given condition.")
            return pd.DataFrame(columns=["nct_id", "title"])  # Return an empty DataFrame

        # Extract relevant details from the nested structure
        trials = []
        for trial in studies:
            try:
                nct_id = trial["protocolSection"]["identificationModule"]["nctId"]
                title = trial["protocolSection"]["identificationModule"].get("officialTitle", "Title Not Available")
                trials.append({"nct_id": nct_id, "title": title})
            except KeyError as e:
                print(f"Missing expected key in trial: {e}")
        
        return pd.DataFrame(trials)  # Convert to a DataFrame for easier analysis
    else:
        raise Exception(f"Error fetching data: {response.status_code}, {response.text}")

# Example usage
condition_trials = fetch_trials_by_condition("diabetes")
print("Condition Trials:")
print(condition_trials)

API Response: {'studies': [{'protocolSection': {'identificationModule': {'nctId': 'NCT00701831', 'orgStudyIdInfo': {'id': 'LANTU_L_03502'}, 'organization': {'fullName': 'Sanofi', 'class': 'INDUSTRY'}, 'briefTitle': 'Assessment of Insulin Glargine in Type 2 Patients for Good Glycemic Control', 'officialTitle': 'Assessment of Forced Titration to Reach the Effective Dose for Good Glycemic Control in Lantus Treated Type 2 Patients', 'acronym': 'LANTIT'}, 'statusModule': {'statusVerifiedDate': '2010-11', 'overallStatus': 'COMPLETED', 'expandedAccessInfo': {'hasExpandedAccess': False}, 'startDateStruct': {'date': '2008-05'}, 'primaryCompletionDateStruct': {'date': '2010-10', 'type': 'ACTUAL'}, 'completionDateStruct': {'date': '2010-10', 'type': 'ACTUAL'}, 'studyFirstSubmitDate': '2008-06-18', 'studyFirstSubmitQcDate': '2008-06-18', 'studyFirstPostDateStruct': {'date': '2008-06-19', 'type': 'ESTIMATED'}, 'lastUpdateSubmitDate': '2010-11-03', 'lastUpdatePostDateStruct': {'date': '2010-11-04', 

In [None]:
import requests
import pandas as pd

BASE_URL = "https://clinicaltrials.gov/api/v2/studies"

def fetch_trials(**query_params):
    """
    Fetch a list of trials from ClinicalTrials.gov API based on dynamic query parameters.
    
    Args:
        **query_params: Arbitrary keyword arguments representing API query parameters.
            Examples: 
                query_params={'query.cond': 'diabetes', 'query.id': 'BP28248', 'query.titles': 'high blood pressure'}
    
    Returns:
        DataFrame containing NCT IDs and titles of matching trials.
    """
    # Construct query string from query_params
    query_string = "&".join(f"{key}={value}" for key, value in query_params.items())
    url = f"{BASE_URL}?format=json&{query_string}"

    response = requests.get(url)

    if response.status_code == 200:
        data = response.json()

        # Debugging: Print the raw JSON response to understand its structure
        print("API Response:", data)

        # Extract the studies list and handle cases where it's missing or empty
        studies = data.get("studies", [])
        if not studies:
            print("No studies found for the given query.")
            return pd.DataFrame(columns=["nct_id", "title"])  # Return an empty DataFrame

        # Extract relevant details from the nested structure
        trials = []
        for trial in studies:
            try:
                nct_id = trial["protocolSection"]["identificationModule"]["nctId"]
                title = trial["protocolSection"]["identificationModule"].get("officialTitle", "Title Not Available")
                trials.append({"nct_id": nct_id, "title": title})
            except KeyError as e:
                print(f"Missing expected key in trial: {e}")

        return pd.DataFrame(trials)  # Convert to a DataFrame for easier analysis
    else:
        raise Exception(f"Error fetching data: {response.status_code}, {response.text}")




In [14]:
# Example usage
query_parameters = {
    "query.cond": "diabetes",
    "query.titles": "high blood pressure"
    # ,
    # "query.id": "BP28248"
}
result_df = fetch_trials(**query_parameters)
print("Fetched Trials:")
print(result_df)

API Response: {'studies': [{'protocolSection': {'identificationModule': {'nctId': 'NCT03532620', 'orgStudyIdInfo': {'id': 'BZ-1702'}, 'organization': {'fullName': 'First Affiliated Hospital, Sun Yat-Sen University', 'class': 'OTHER'}, 'briefTitle': 'China Protection Trial of Glucose Metabolism by Pitavastatin in Patients With Prediabetes and Hypertension', 'officialTitle': 'A Multi-center, Open-label, Randomized, 12-month, Parallel-group, Non-inferiority Study to Compare the Hemoglobin A1C Metabolism of Pitavastatin Therapy Versus Atorvastatin in Chinese Patients With Prediabetes and Hypertension', 'acronym': 'CAMPUS'}, 'statusModule': {'statusVerifiedDate': '2019-05', 'overallStatus': 'UNKNOWN', 'lastKnownStatus': 'RECRUITING', 'expandedAccessInfo': {'hasExpandedAccess': False}, 'startDateStruct': {'date': '2018-08-09', 'type': 'ACTUAL'}, 'primaryCompletionDateStruct': {'date': '2019-09', 'type': 'ESTIMATED'}, 'completionDateStruct': {'date': '2020-09', 'type': 'ESTIMATED'}, 'studyFir