# TCIA API Quickstart (tcia_utils)

Steps:
1. Install dependencies (tcia_utils).
2. List collections.
3. Filter series (collection, modality=CT, body part).
4. Download one series to `data/tcia_downloads/`.
5. Summarize downloaded files.

Notes:
- Many collections are public; some require TCIA login/API key.
- LIDC-IDRI is public and works without auth.

In [None]:
# Install if missing (idempotent)
try:
    import tcia_utils  # noqa: F401
except ImportError:
    import sys, subprocess
    subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'tcia_utils'])

Collecting tcia_utils
  Using cached tcia_utils-3.2.1-py3-none-any.whl.metadata (2.2 kB)
Collecting bs4 (from tcia_utils)
  Using cached bs4-0.0.2-py2.py3-none-any.whl.metadata (411 bytes)
Collecting pandas (from tcia_utils)
  Using cached pandas-3.0.0-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl.metadata (79 kB)
Collecting plotly (from tcia_utils)
  Using cached plotly-6.5.2-py3-none-any.whl.metadata (8.5 kB)
Collecting requests (from tcia_utils)
  Using cached requests-2.32.5-py3-none-any.whl.metadata (4.9 kB)
Collecting tqdm (from tcia_utils)
  Using cached tqdm-4.67.1-py3-none-any.whl.metadata (57 kB)
Collecting unidecode (from tcia_utils)
  Using cached Unidecode-1.4.0-py3-none-any.whl.metadata (13 kB)
Collecting beautifulsoup4 (from bs4->tcia_utils)
  Using cached beautifulsoup4-4.14.3-py3-none-any.whl.metadata (3.8 kB)
Collecting soupsieve>=1.6.1 (from beautifulsoup4->bs4->tcia_utils)
  Using cached soupsieve-2.8.3-py3-none-any.whl.metadata (4.6 kB)
Collecting typ

In [None]:
from tcia_utils import nbia
import os, pprint

## 1) List collections
Shows available collections via NBIA.

In [None]:
collections = nbia.get_collections()
print(f'Total collections: {len(collections)}')
collections.head()

## 2) Filter series (collection, modality, body part)
Using LIDC-IDRI (public lung CT) as example.

In [None]:
collection = 'LIDC-IDRI'
series_df = nbia.get_series(Collection=collection, Modality='CT', BodyPartExamined='CHEST')
print(f'Series found: {len(series_df)}')
series_df.head()

## 3) Pick one SeriesInstanceUID
Select the first one (customize as needed).

In [None]:
if series_df.empty:
    raise SystemExit('No series found for the given filters; adjust collection/modality/body part.')
series_uid = series_df.iloc[0]['SeriesInstanceUID']
print('Selected SeriesInstanceUID:', series_uid)

## 4) Download the series
Files will be saved under `data/tcia_downloads/<SeriesInstanceUID>/`.

In [None]:
output_root = '/workspaces/Dicom-to-3D-/data/tcia_downloads'
os.makedirs(output_root, exist_ok=True)
target_dir = os.path.join(output_root, series_uid)
os.makedirs(target_dir, exist_ok=True)

nbia.download_series(SeriesInstanceUID=series_uid, path=target_dir, includePHI=False)
print('Download complete ->', target_dir)

## 5) Summarize downloaded files

In [None]:
file_count = 0
for root, _, files in os.walk(target_dir):
    file_count += len([f for f in files if f.lower().endswith('.dcm')])
print(f'DICOM files downloaded: {file_count}')
# Show a few filenames
sample_files = []
for root, _, files in os.walk(target_dir):
    for f in files:
        if f.lower().endswith('.dcm'):
            sample_files.append(os.path.join(root, f))
    if len(sample_files) >= 5:
        break
print('Sample files:')
pprint.pp(sample_files[:5])