In [1]:
import os
import requests

In [2]:
# The dataset DOI
DOI = "10.7910/DVN/3AIJKU"

# Fetch dataset metadata
meta_url = "https://dataverse.harvard.edu/api/datasets/:persistentId"
params = {"persistentId": f"doi:{DOI}"}
resp = requests.get(meta_url, params=params)
resp.raise_for_status()
dataset = resp.json()["data"]

files = dataset['latestVersion']['files']

## Download Pretrained SimCLR Checkpoints

In [31]:
# Filter only the SimCLR checkpoint files
model_files = [
    f for f in files
    if f["label"].startswith("model_") and f["label"].endswith(".ckpt")
]

# Create output directory
out_dir = "simclr_models"
os.makedirs(out_dir, exist_ok=True)

for f in model_files:
    file_id  = f["dataFile"]["id"]
    filename = f["label"]
    download_url = f"https://dataverse.harvard.edu/api/access/datafile/{file_id}"

    print(f"Downloading {filename}…", end=" ")
    dl = requests.get(download_url, stream=True)
    dl.raise_for_status()
    with open(os.path.join(out_dir, filename), "wb") as fp:
        for chunk in dl.iter_content(chunk_size=8192):
            fp.write(chunk)
    print("Done")

Downloading model_0.ckpt… Done
Downloading model_1.ckpt… Done
Downloading model_2.ckpt… Done
Downloading model_3.ckpt… Done
Downloading model_4.ckpt… Done
Downloading model_5.ckpt… Done
Downloading model_6.ckpt… Done
Downloading model_7.ckpt… Done


## Download Pretrained MLP Models

In [3]:
# Filter only the Dino+MLP model files
mlp_files = [
    f for f in files
    if f["label"].startswith("model_") and f["label"].endswith(".pt")
]

# Create output directory
out_dir = "dino_mlp_models"
os.makedirs(out_dir, exist_ok=True)

for f in mlp_files:
    file_id  = f["dataFile"]["id"]
    filename = f["label"]
    download_url = f"https://dataverse.harvard.edu/api/access/datafile/{file_id}"

    print(f"Downloading {filename}…", end=" ")
    dl = requests.get(download_url, stream=True)
    dl.raise_for_status()
    with open(os.path.join(out_dir, filename), "wb") as fp:
        for chunk in dl.iter_content(chunk_size=8192):
            fp.write(chunk)
    print("Done")

Downloading model_0.pt… Done
Downloading model_1.pt… Done
Downloading model_2.pt… Done
Downloading model_3.pt… Done
Downloading model_4.pt… Done


## Download Labeled Data

In [4]:
# Filter only the .npy data files
npy_files = [
    f for f in files
    if f["label"].endswith(".npy")
]

# Create output directory
out_dir = "labeled_data"
os.makedirs(out_dir, exist_ok=True)

for f in npy_files:
    file_id  = f["dataFile"]["id"]
    filename = f["label"]
    download_url = f"https://dataverse.harvard.edu/api/access/datafile/{file_id}"

    print(f"Downloading {filename}…", end=" ")
    dl = requests.get(download_url, stream=True)
    dl.raise_for_status()
    with open(os.path.join(out_dir, filename), "wb") as fp:
        for chunk in dl.iter_content(chunk_size=8192):
            fp.write(chunk)
    print("Done")

Downloading zdisc_contours_images.npy… Done
Downloading zdisc_contours_labels.npy… Done
