In [6]:
# 📦 0. Shared constants — copied verbatim from /api/trial-meta/route.ts
# --------------------------------------------------------------------
import requests
from typing import List, Dict

CT_BASE_URL = (
    "https://clinicaltrials.gov/api/v2/studies"
    "?fields=nctId,phase,fundedBy,overallStatus"
)
MAX_BATCH = 100  # CT.gov caps at 100 IDs / request

In [14]:
import requests
from typing import List, Dict

BASE = "https://clinicaltrials.gov/api/v2/studies"
MAX_BATCH = 500      # Practically safe for URL length; adjust if needed.

# Keep your original keys ➔ UI stays unchanged
KEEP_FIELDS = (
    "protocolSection.identificationModule.nctId"
    ",protocolSection.identificationModule.briefTitle"   #  ← NEW
    ",protocolSection.designModule.phases"
    ",protocolSection.sponsorCollaboratorsModule.leadSponsor.class"
    ",protocolSection.statusModule.overallStatus"
)

def fetch_trial_meta(nct_ids: List[str], max_batch: int = MAX_BATCH) -> List[Dict]:
    """Return [{nctId, title, phase, fundedBy, overallStatus}, …]"""
    out: List[Dict] = []

    for i in range(0, len(nct_ids), max_batch):
        batch = nct_ids[i:i + max_batch]

        params = {
            "filter.ids": ",".join(batch),
            "fields": KEEP_FIELDS,
            "format": "json",
            "pageSize": len(batch),       # avoids pagination for small batches
        }

        r = requests.get(BASE, params=params, timeout=20,
                         headers={"accept": "application/json"})
        r.raise_for_status()

        for s in r.json().get("studies", []):
            psec = s["protocolSection"]
            ident = psec["identificationModule"]
            design = psec.get("designModule", {})
            phases = design.get("phases", []) or ["NA"]
            # first digit 1‑4 or 'NA'
            phase = next((d for d in "1234" if any(d in ph for ph in phases)), "NA")

            out.append({
                "nctId": ident["nctId"],
                "title": ident.get("briefTitle"),          # ← NEW
                "phase": phase,
                "fundedBy": (
                    psec.get("sponsorCollaboratorsModule", {})
                        .get("leadSponsor", {})
                        .get("class")
                ),
                "overallStatus": psec.get("statusModule", {}).get("overallStatus"),
            })

    return out


In [15]:
sample_ids = ["NCT04379570", "NCT01772004"]
print(fetch_trial_meta(sample_ids))


[{'nctId': 'NCT01772004', 'title': 'Avelumab in Metastatic or Locally Advanced Solid Tumors (JAVELIN Solid Tumor)', 'phase': '1', 'fundedBy': 'INDUSTRY', 'overallStatus': 'COMPLETED'}, {'nctId': 'NCT04379570', 'title': 'Additional Support Program Via Text Messaging and Telephone-Based Counseling for Breast Cancer Patients Receiving Hormonal Therapy', 'phase': '3', 'fundedBy': 'OTHER', 'overallStatus': 'RECRUITING'}]
