In [None]:
# download_media.py （ノートブックでもOK、セルはこれ1つで完結）

import time
from pathlib import Path
import requests

# --- 共通ヘッダー ---
BASE_HEADERS = {
    "User-Agent": ("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) "
                   "AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0.1 Safari/605.1.15"),
    "Accept": "*/*",
    "Accept-Language": "ja,en-US;q=0.9,en;q=0.8",
}

# --- ドメイン別 Referer（403対策：Pixabay/Flickr）---
REFERERS = {
    "pixabay.com": "https://pixabay.com/",
    "flickr.com": "https://www.flickr.com/",
    "staticflickr.com": "https://www.flickr.com/",
}

def _pick_referer(url: str) -> str | None:
    for key, ref in REFERERS.items():
        if key in url:
            return ref
    return None

def download(url: str, filename: str, timeout: float = 20.0, chunk: int = 1024 * 256, retries: int = 3) -> bool:
    """Referer自動付与＋リトライ付きダウンロード。成功 True / 失敗 False を返す。"""
    Path(filename).parent.mkdir(parents=True, exist_ok=True)
    last_err = None
    for attempt in range(1, retries + 1):
        try:
            headers = dict(BASE_HEADERS)
            ref = _pick_referer(url)
            if ref:
                headers["Referer"] = ref

            with requests.get(url, headers=headers, timeout=timeout, stream=True) as r:
                r.raise_for_status()
                with open(filename, "wb") as f:
                    for c in r.iter_content(chunk):
                        if c:
                            f.write(c)
            return True
        except requests.HTTPError as e:
            last_err = e
            code = e.response.status_code if e.response is not None else None
            if code in (403, 429, 500, 502, 503, 504):
                time.sleep(1.5 * attempt)
                continue
            break
        except requests.RequestException as e:
            last_err = e
            time.sleep(1.0 * attempt)
            continue
    print(f"[FAILED] {url} -> {filename} ({last_err})")
    return False


# ---- ダウンロード対象 ----
files = [
    # Wikipedia / Commons（Referer不要）
    ("https://upload.wikimedia.org/wikipedia/commons/0/03/Woy_Woy_Channel_-_Vignetted.jpg", "downloads/lake.jpg"),
    ("https://upload.wikimedia.org/wikipedia/commons/b/bb/Worldflags19.png", "downloads/flag.png"),
    ("https://upload.wikimedia.org/wikipedia/commons/3/32/Dead_tree_salt_and_pepper.png", "downloads/salt_and_pepper.png"),
    ("https://upload.wikimedia.org/wikipedia/commons/thumb/3/3e/Text.JPG/640px-Text.JPG", "downloads/text.jpg"),
    ("https://upload.wikimedia.org/wikipedia/en/thumb/5/5d/Shirt58_handwriting.png/800px-Shirt58_handwriting.png", "downloads/writing.png"),
    ("https://upload.wikimedia.org/wikipedia/commons/thumb/6/69/Human_evolution.svg/600px-Human_evolution.svg.png", "downloads/evol.png"),
    ("https://upload.wikimedia.org/wikipedia/commons/thumb/b/bd/Early_Imperial_Australian_Coins.jpg/320px-Early_Imperial_Australian_Coins.jpg", "downloads/coins.jpg"),
    ("https://upload.wikimedia.org/wikipedia/commons/thumb/2/29/Taxi_Noord_business_card%2C_Oude_Pekela_%282020%29_02.jpg/640px-Taxi_Noord_business_card%2C_Oude_Pekela_%282020%29_02.jpg", "downloads/card.jpg"),
    ("https://upload.wikimedia.org/wikipedia/commons/thumb/5/53/Colosseum_in_Rome%2C_Italy_-_April_2007.jpg/640px-Colosseum_in_Rome%2C_Italy_-_April_2007.jpg", "downloads/Colosseum.jpg"),
    ("https://upload.wikimedia.org/wikipedia/commons/5/53/Colosseum_in_Rome%2C_Italy_-_April_2007.jpg", "downloads/Colosseum_original.jpg"),
    ("https://upload.wikimedia.org/wikipedia/commons/thumb/a/af/Notre-Dame_de_Paris_2013-07-24.jpg/355px-Notre-Dame_de_Paris_2013-07-24.jpg", "downloads/Notre-Dame-1.jpg"),
    ("https://upload.wikimedia.org/wikipedia/commons/thumb/1/11/Paris%2C_Notre_Dame_--_2014_--_1445.jpg/301px-Paris%2C_Notre_Dame_--_1445.jpg", "downloads/Notre-Dame-2.jpg"),
    ("https://upload.wikimedia.org/wikipedia/commons/thumb/4/41/Moon_right-view_%28Clementine_dataset%29.png/800px-Moon_right-view_%28Clementine_dataset%29.png", "downloads/moon.png"),

    # Pixabay（403対策で Referer 付与されます）
    ("https://cdn.pixabay.com/photo/2015/09/02/13/24/girl-919048_1280.jpg", "downloads/girl.jpg"),
    ("https://cdn.pixabay.com/photo/2017/10/26/15/05/honeycomb-2891372_1280.jpg", "downloads/honeycomb.jpg"),

    # Flickr（staticflickr も Referer: flickr.com を付与）
    ("https://live.staticflickr.com/6226/6375883291_972be61f52_w_d.jpg", "downloads/IR_cat.jpg"),

    # Openclipart
    ("https://openclipart.org/image/400px/svg_to_png/247372/WomanWalking.png", "downloads/sil.png"),

    # OpenCV サンプル
    ("https://raw.githubusercontent.com/opencv/opencv/master/samples/data/aloeL.jpg", "downloads/aloeL.jpg"),
    ("https://raw.githubusercontent.com/opencv/opencv/master/samples/data/aloeR.jpg", "downloads/aloeR.jpg"),

    # WAV（バイナリでもOK）
    ("https://upload.wikimedia.org/wikipedia/commons/9/93/Start.wav", "downloads/start.wav"),
]

# ---- 実行 ----
ok = ng = 0
for url, name in files:
    print("↓", url, "->", name)
    if download(url, name):
        ok += 1
    else:
        ng += 1
print(f"done. success={ok}, failed={ng}")
