### Sample frames to label

* Samples 150 frames per bird
* Saves the cropped images

In [3]:
from pathlib import Path
from random import seed, sample
from tqdm.auto import tqdm
import shutil

SOURCE_ROOT = Path("/lisc/data/scratch/becogbio/juarez/thesis/2.1_det_mask_frames_2021/")
DEST_ROOT = Path("/lisc/data/scratch/becogbio/juarez/thesis/3_Viewpoint_classifier/frames_for_labelling")
DEST_ROOT.mkdir(parents=True, exist_ok=True)

SAMPLES_PER_BIRD = 150
SEED = 42
bird_list = [
    "BRG-YOM", "EYB-RPM", "BNU-RPM", "GBM-ORY", "OGY-BRM", "GBY-ORM", "RYO-BGM", "OYR-BGM", "OEB-RPM",
    "ORB-UYM", "YM-OBR", "BNY-RPM", "OUB-RPM", "YRU-POM", "RGY-BOM", "BRK-NOM", "YGO-M", "BEU-RPM",
    "ORG-BYM", "PUE-ONM"]

seed(SEED)

def collect_pngs(folder: Path):
    # Only keep PNGs, skip AppleDouble "._" files and very small files
    return sorted(
        p for p in folder.rglob("*.png")
        if p.is_file() and not p.name.startswith("._") and p.stat().st_size > 4096
    )

existing_birds = [b for b in bird_list if (SOURCE_ROOT / b).is_dir()]
missing_birds = [b for b in bird_list if not (SOURCE_ROOT / b).is_dir()]
if missing_birds:
    print("⚠ Missing folders (skipped):", ", ".join(missing_birds))

for bird in tqdm(existing_birds, desc="birds", unit="bird"):
    sub = SOURCE_ROOT / bird
    png_files = collect_pngs(sub)

    if not png_files:
        print(f"⚠ No PNGs in {bird}")
        continue

    k = min(SAMPLES_PER_BIRD, len(png_files))
    chosen = sample(png_files, k)

    for idx, img_path in enumerate(tqdm(chosen, desc=bird, unit="img", leave=False), start=1):
        name = f"{bird}_{idx:04d}{img_path.suffix.lower()}"
        shutil.copy2(img_path, DEST_ROOT / name)

    if len(png_files) < SAMPLES_PER_BIRD:
        print(f"{bird}: only {len(png_files)} images available; copied all.")
    else:
        print(f"{bird}: {SAMPLES_PER_BIRD} images copied.")

print("Done!")

birds:   0%|          | 0/20 [00:00<?, ?bird/s]
BRG-YOM:   0%|          | 0/150 [00:00<?, ?img/s][A
BRG-YOM:  21%|██        | 31/150 [00:00<00:00, 306.49img/s][A
BRG-YOM:  42%|████▏     | 63/150 [00:00<00:00, 304.48img/s][A
BRG-YOM:  63%|██████▎   | 94/150 [00:00<00:00, 301.29img/s][A
BRG-YOM:  83%|████████▎ | 125/150 [00:00<00:00, 297.98img/s][A
birds:   5%|▌         | 1/20 [00:09<03:07,  9.88s/bird]     [A

BRG-YOM: 150 images copied.



EYB-RPM:   0%|          | 0/150 [00:00<?, ?img/s][A
EYB-RPM:  25%|██▍       | 37/150 [00:00<00:00, 359.98img/s][A
EYB-RPM:  49%|████▊     | 73/150 [00:00<00:00, 353.85img/s][A
EYB-RPM:  73%|███████▎  | 109/150 [00:00<00:00, 335.46img/s][A
EYB-RPM:  95%|█████████▌| 143/150 [00:00<00:00, 327.71img/s][A
birds:  10%|█         | 2/20 [00:16<02:26,  8.13s/bird]     [A

EYB-RPM: 150 images copied.



BNU-RPM:   0%|          | 0/150 [00:00<?, ?img/s][A
BNU-RPM:  17%|█▋        | 26/150 [00:00<00:00, 251.92img/s][A
BNU-RPM:  37%|███▋      | 55/150 [00:00<00:00, 270.99img/s][A
BNU-RPM:  55%|█████▌    | 83/150 [00:00<00:00, 274.47img/s][A
BNU-RPM:  75%|███████▌  | 113/150 [00:00<00:00, 282.10img/s][A
BNU-RPM:  95%|█████████▍| 142/150 [00:00<00:00, 278.00img/s][A
birds:  15%|█▌        | 3/20 [00:25<02:20,  8.29s/bird]     [A

BNU-RPM: 150 images copied.



GBM-ORY:   0%|          | 0/150 [00:00<?, ?img/s][A
GBM-ORY:  22%|██▏       | 33/150 [00:00<00:00, 328.90img/s][A
GBM-ORY:  45%|████▍     | 67/150 [00:00<00:00, 329.85img/s][A
GBM-ORY:  68%|██████▊   | 102/150 [00:00<00:00, 336.05img/s][A
GBM-ORY:  91%|█████████ | 136/150 [00:00<00:00, 318.82img/s][A
birds:  20%|██        | 4/20 [00:30<01:50,  6.91s/bird]     [A

GBM-ORY: 150 images copied.



OGY-BRM:   0%|          | 0/150 [00:00<?, ?img/s][A
OGY-BRM:  24%|██▍       | 36/150 [00:00<00:00, 356.91img/s][A
OGY-BRM:  48%|████▊     | 72/150 [00:00<00:00, 340.96img/s][A
OGY-BRM:  71%|███████▏  | 107/150 [00:00<00:00, 344.44img/s][A
OGY-BRM:  95%|█████████▍| 142/150 [00:00<00:00, 333.76img/s][A
birds:  25%|██▌       | 5/20 [00:33<01:25,  5.69s/bird]     [A

OGY-BRM: 150 images copied.



GBY-ORM:   0%|          | 0/150 [00:00<?, ?img/s][A
GBY-ORM:  26%|██▌       | 39/150 [00:00<00:00, 383.98img/s][A
GBY-ORM:  53%|█████▎    | 79/150 [00:00<00:00, 386.99img/s][A
GBY-ORM:  79%|███████▊  | 118/150 [00:00<00:00, 367.17img/s][A
birds:  30%|███       | 6/20 [00:35<01:03,  4.57s/bird]     [A

GBY-ORM: 150 images copied.



RYO-BGM:   0%|          | 0/150 [00:00<?, ?img/s][A
RYO-BGM:  25%|██▍       | 37/150 [00:00<00:00, 356.77img/s][A
RYO-BGM:  49%|████▊     | 73/150 [00:00<00:00, 352.12img/s][A
RYO-BGM:  74%|███████▍  | 111/150 [00:00<00:00, 360.04img/s][A
RYO-BGM:  99%|█████████▊| 148/150 [00:00<00:00, 317.01img/s][A
birds:  35%|███▌      | 7/20 [00:38<00:50,  3.89s/bird]     [A

RYO-BGM: 150 images copied.



OYR-BGM:   0%|          | 0/150 [00:00<?, ?img/s][A
OYR-BGM:  21%|██        | 31/150 [00:00<00:00, 299.43img/s][A
OYR-BGM:  41%|████      | 61/150 [00:00<00:00, 259.34img/s][A
OYR-BGM:  59%|█████▊    | 88/150 [00:00<00:00, 246.31img/s][A
OYR-BGM:  76%|███████▌  | 114/150 [00:00<00:00, 247.51img/s][A
OYR-BGM:  93%|█████████▎| 139/150 [00:00<00:00, 241.55img/s][A
birds:  40%|████      | 8/20 [00:40<00:40,  3.42s/bird]     [A

OYR-BGM: 150 images copied.



OEB-RPM:   0%|          | 0/150 [00:00<?, ?img/s][A
OEB-RPM:  21%|██        | 31/150 [00:00<00:00, 309.15img/s][A
OEB-RPM:  42%|████▏     | 63/150 [00:00<00:00, 314.68img/s][A
OEB-RPM:  63%|██████▎   | 95/150 [00:00<00:00, 313.15img/s][A
OEB-RPM:  85%|████████▌ | 128/150 [00:00<00:00, 316.50img/s][A
birds:  45%|████▌     | 9/20 [00:44<00:37,  3.43s/bird]     [A

OEB-RPM: 150 images copied.



ORB-UYM:   0%|          | 0/150 [00:00<?, ?img/s][A
ORB-UYM:  23%|██▎       | 34/150 [00:00<00:00, 334.18img/s][A
ORB-UYM:  47%|████▋     | 70/150 [00:00<00:00, 343.57img/s][A
ORB-UYM:  70%|███████   | 105/150 [00:00<00:00, 341.37img/s][A
ORB-UYM:  93%|█████████▎| 140/150 [00:00<00:00, 286.63img/s][A
birds:  50%|█████     | 10/20 [00:46<00:29,  2.91s/bird]    [A

ORB-UYM: 150 images copied.



YM-OBR:   0%|          | 0/150 [00:00<?, ?img/s][A
YM-OBR:  21%|██▏       | 32/150 [00:00<00:00, 317.06img/s][A
YM-OBR:  46%|████▌     | 69/150 [00:00<00:00, 346.48img/s][A
YM-OBR:  71%|███████   | 106/150 [00:00<00:00, 354.64img/s][A
YM-OBR:  95%|█████████▍| 142/150 [00:00<00:00, 307.77img/s][A
birds:  55%|█████▌    | 11/20 [00:47<00:23,  2.56s/bird]   [A

YM-OBR: 150 images copied.



BNY-RPM:   0%|          | 0/150 [00:00<?, ?img/s][A
BNY-RPM:  21%|██        | 31/150 [00:00<00:00, 297.40img/s][A
BNY-RPM:  41%|████      | 61/150 [00:00<00:00, 264.34img/s][A
BNY-RPM:  59%|█████▊    | 88/150 [00:00<00:00, 262.07img/s][A
BNY-RPM:  77%|███████▋  | 115/150 [00:00<00:00, 257.50img/s][A
BNY-RPM:  94%|█████████▍| 141/150 [00:00<00:00, 255.34img/s][A
birds:  60%|██████    | 12/20 [00:48<00:16,  2.00s/bird]    [A

BNY-RPM: 150 images copied.



OUB-RPM:   0%|          | 0/150 [00:00<?, ?img/s][A
OUB-RPM:  25%|██▌       | 38/150 [00:00<00:00, 369.19img/s][A
OUB-RPM:  50%|█████     | 75/150 [00:00<00:00, 366.78img/s][A
OUB-RPM:  75%|███████▍  | 112/150 [00:00<00:00, 311.80img/s][A
OUB-RPM:  96%|█████████▌| 144/150 [00:00<00:00, 276.44img/s][A
birds:  65%|██████▌   | 13/20 [00:49<00:12,  1.79s/bird]    [A

OUB-RPM: 150 images copied.



YRU-POM:   0%|          | 0/150 [00:00<?, ?img/s][A
YRU-POM:  25%|██▍       | 37/150 [00:00<00:00, 360.13img/s][A
YRU-POM:  49%|████▉     | 74/150 [00:00<00:00, 295.48img/s][A
YRU-POM:  70%|███████   | 105/150 [00:00<00:00, 272.12img/s][A
YRU-POM:  89%|████████▊ | 133/150 [00:00<00:00, 259.52img/s][A
birds:  70%|███████   | 14/20 [00:51<00:09,  1.61s/bird]    [A

YRU-POM: 150 images copied.



RGY-BOM:   0%|          | 0/150 [00:00<?, ?img/s][A
RGY-BOM:  20%|██        | 30/150 [00:00<00:00, 288.02img/s][A
RGY-BOM:  39%|███▉      | 59/150 [00:00<00:00, 279.39img/s][A
RGY-BOM:  58%|█████▊    | 87/150 [00:00<00:00, 265.90img/s][A
RGY-BOM:  76%|███████▌  | 114/150 [00:00<00:00, 261.46img/s][A
RGY-BOM:  94%|█████████▍| 141/150 [00:00<00:00, 261.22img/s][A
birds:  75%|███████▌  | 15/20 [00:51<00:06,  1.35s/bird]    [A

RGY-BOM: 150 images copied.



BRK-NOM:   0%|          | 0/150 [00:00<?, ?img/s][A
BRK-NOM:  24%|██▍       | 36/150 [00:00<00:00, 356.14img/s][A
BRK-NOM:  48%|████▊     | 72/150 [00:00<00:00, 325.32img/s][A
BRK-NOM:  70%|███████   | 105/150 [00:00<00:00, 288.90img/s][A
BRK-NOM:  90%|█████████ | 135/150 [00:00<00:00, 257.12img/s][A
birds:  80%|████████  | 16/20 [00:53<00:05,  1.31s/bird]    [A

BRK-NOM: 150 images copied.



YGO-M:   0%|          | 0/150 [00:00<?, ?img/s][A
YGO-M:  21%|██        | 31/150 [00:00<00:00, 308.41img/s][A
YGO-M:  41%|████▏     | 62/150 [00:00<00:00, 250.93img/s][A
YGO-M:  59%|█████▊    | 88/150 [00:00<00:00, 233.82img/s][A
YGO-M:  76%|███████▌  | 114/150 [00:00<00:00, 239.12img/s][A
YGO-M:  93%|█████████▎| 139/150 [00:00<00:00, 230.83img/s][A
birds:  85%|████████▌ | 17/20 [00:53<00:03,  1.18s/bird]  [A

YGO-M: 150 images copied.



BEU-RPM:   0%|          | 0/150 [00:00<?, ?img/s][A
BEU-RPM:  19%|█▊        | 28/150 [00:00<00:00, 278.72img/s][A
BEU-RPM:  38%|███▊      | 57/150 [00:00<00:00, 284.89img/s][A
BEU-RPM:  57%|█████▋    | 86/150 [00:00<00:00, 230.74img/s][A
BEU-RPM:  74%|███████▍  | 111/150 [00:00<00:00, 234.52img/s][A
BEU-RPM:  92%|█████████▏| 138/150 [00:00<00:00, 245.20img/s][A
birds:  90%|█████████ | 18/20 [00:54<00:02,  1.10s/bird]    [A

BEU-RPM: 150 images copied.



ORG-BYM:   0%|          | 0/150 [00:00<?, ?img/s][A
ORG-BYM:  17%|█▋        | 26/150 [00:00<00:00, 245.68img/s][A
ORG-BYM:  34%|███▍      | 51/150 [00:00<00:00, 236.58img/s][A
ORG-BYM:  50%|█████     | 75/150 [00:00<00:00, 217.29img/s][A
ORG-BYM:  65%|██████▌   | 98/150 [00:00<00:00, 219.57img/s][A
ORG-BYM:  81%|████████  | 121/150 [00:00<00:00, 136.38img/s][A
ORG-BYM:  92%|█████████▏| 138/150 [00:01<00:00, 95.29img/s] [A
birds:  95%|█████████▌| 19/20 [00:56<00:01,  1.12s/bird]   [A

ORG-BYM: 150 images copied.



PUE-ONM:   0%|          | 0/119 [00:00<?, ?img/s][A
PUE-ONM:  21%|██        | 25/119 [00:00<00:00, 246.28img/s][A
PUE-ONM:  42%|████▏     | 50/119 [00:00<00:00, 245.39img/s][A
PUE-ONM:  65%|██████▍   | 77/119 [00:00<00:00, 251.95img/s][A
PUE-ONM:  87%|████████▋ | 103/119 [00:00<00:00, 254.67img/s][A
birds: 100%|██████████| 20/20 [00:56<00:00,  2.82s/bird]    [A

PUE-ONM: only 119 images available; copied all.
Done!



