In [2]:
import requests
import pandas as pd

In [None]:
BASE_URL = "https://clinicaltrials.gov/api/v2/studies"

def fetch_trials_v2(condition, country=None, status="RECRUITING", max_records=50):
    params = {
        "query.cond": condition,          
        "fields": "NCTId,BriefTitle,OverallStatus,Phase,LocationCountry,LeadSponsorName",
        "pageSize": max_records,
        "countTotal": "true",
    }
    if country:
        params["query.locn"] = country
    if status:
        params["filter.overallStatus"] = status.upper()  

    r = requests.get(BASE_URL, params=params)
    r.raise_for_status()
    data = r.json()   

    studies = data.get("studies", [])
    rows = []
    for s in studies:
        proto = s.get("protocolSection", {})
        ident = proto.get("identificationModule", {})
        status_mod = proto.get("statusModule", {})
        sponsor_mod = proto.get("sponsorCollaboratorsModule", {})
        loc_mod = proto.get("contactsLocationsModule", {})

        rows.append({
            "NCTId": ident.get("nctId"),
            "BriefTitle": ident.get("briefTitle"),
            "OverallStatus": status_mod.get("overallStatus"),
            "Phase": status_mod.get("phase"),
            "LeadSponsorName": (sponsor_mod.get("leadSponsor") or {}).get("name"),
            "LocationCountry": ",".join(
                sorted({loc.get("country") for loc in loc_mod.get("locations", []) if loc.get("country")})
            ) or None,
        })

    return pd.DataFrame(rows)

def build_sponsor_profiles(df):
    if df.empty:
        return []
    grouped = (df.groupby("LeadSponsorName")
        .agg(
            n_trials=("NCTId", "nunique"),
            phases=("Phase", lambda x: ", ".join(sorted(set(x.dropna())))),
            countries=("LocationCountry", lambda x: ", ".join(sorted(set(x.dropna())))),
        )
        .reset_index()
        .rename(columns={"LeadSponsorName": "sponsor"})
    )
    return grouped.to_dict(orient="records")

def build_phase_distribution(df):
    if df.empty:
        return []
    counts = (df["Phase"]
              .fillna("Unknown")
              .value_counts()
              .reset_index())
    counts.columns = ["phase", "n_trials"]
    counts["percent"] = counts["n_trials"] / counts["n_trials"].sum() * 100
    return counts.to_dict(orient="records")


def clinical_trials_worker(payload):
    """
    payload should contain: condition, country, status, max_records
    This function returns structured data for Main Agent.
    """
    condition = payload.get("condition")
    country   = payload.get("country")
    status    = payload.get("status", "Recruiting")
    max_rec   = payload.get("max_records", 200)

    df = fetch_trials_v2(condition, country, status, max_rec)

    result = {
        "active_trials": df.to_dict(orient="records"),
        "sponsor_profiles": build_sponsor_profiles(df),
        "phase_distribution": build_phase_distribution(df),
    }

    return result

In [None]:
payload = {
    "condition": "Diabetes Mellitus, Type 2",  \
    "country": "Brazil",
    "status": "RECRUITING", 
    "max_records": 5
}
clinical_trials_worker(payload)

{'active_trials': [{'NCTId': 'NCT05598203',
   'BriefTitle': 'Effect of Nutrition Education Groups in the Treatment of Patients with Type 2 Diabetes',
   'OverallStatus': 'RECRUITING',
   'Phase': None,
   'LeadSponsorName': 'Hospital de Clinicas de Porto Alegre',
   'LocationCountry': 'Brazil'},
  {'NCTId': 'NCT05348733',
   'BriefTitle': 'A Study Called FINE-REAL to Learn More About the Use of the Drug Finerenone in a Routine Medical Care Setting',
   'OverallStatus': 'RECRUITING',
   'Phase': None,
   'LeadSponsorName': 'Bayer',
   'LocationCountry': 'Argentina,Belgium,Brazil,Canada,China,Denmark,Germany,Greece,Mexico,Netherlands,Portugal,Russia,Saudi Arabia,Singapore,Slovenia,South Korea,Switzerland,Taiwan,Thailand,United States'},
  {'NCTId': 'NCT07064473',
   'BriefTitle': 'A Study to Test Vicadrostat (BI 690517) Taken Together With Empagliflozin in People With Type 2 Diabetes, High Blood Pressure, and Cardiovascular Disease',
   'OverallStatus': 'RECRUITING',
   'Phase': None,
 

In [10]:
from typing import Optional, Dict, Any
from langchain_core.tools import tool

@tool
def clinical_trials_tool(
    condition: str,
    country: Optional[str] = None,
    status: str = "RECRUITING",
    max_records: int = 50,
) -> Dict[str, Any]:
    """
    Fetch and summarize clinical trial data from ClinicalTrials.gov v2.

    Returns:
      - active_trials
      - sponsor_profiles
      - phase_distribution
    """
    payload = {
        "condition": condition,
        "country": country,
        "status": status,
        "max_records": max_records,
    }
    return clinical_trials_worker(payload)

In [16]:
payload = {
    "condition": "Diabetes Mellitus, Type 2",  # Combined condition
    "country": "Brazil",
    "status": "RECRUITING", 
    "max_records": 5
}
result = clinical_trials_tool.invoke(payload)
print(result)

{'active_trials': [{'NCTId': 'NCT06735859', 'BriefTitle': 'Eating Window and Sleep Disorders on Glycemic Control, Cardiovascular Risk, and Weight Loss', 'OverallStatus': 'RECRUITING', 'Phase': None, 'LeadSponsorName': 'Federal University of São Paulo', 'LocationCountry': 'Brazil'}, {'NCTId': 'NCT03549754', 'BriefTitle': 'iCaReMe Global Registry', 'OverallStatus': 'RECRUITING', 'Phase': None, 'LeadSponsorName': 'AstraZeneca', 'LocationCountry': 'Brazil,Chile,Côte d’Ivoire,Hong Kong,India,Iraq,Kazakhstan,Kenya,Malaysia,Mexico,Nigeria,South Africa,Taiwan,Ukraine,United Arab Emirates'}, {'NCTId': 'NCT04874012', 'BriefTitle': 'Taurine Effect on Glycemic, Lipidic and Inflammatory Profile in Individuals With Type 2 Diabetes', 'OverallStatus': 'RECRUITING', 'Phase': None, 'LeadSponsorName': 'Hospital de Clinicas de Porto Alegre', 'LocationCountry': 'Brazil'}, {'NCTId': 'NCT07064473', 'BriefTitle': 'A Study to Test Vicadrostat (BI 690517) Taken Together With Empagliflozin in People With Type 2 