In [None]:
from google.colab import auth
auth.authenticate_user() # GCPアカウントでログイン

import os
import re

# Drive のベースディレクトリと GCS の同期先
BASE_DIR = "/content/drive/MyDrive/R-NaD-PokemonTCGP_experiments"
DEST_BASE = "gs://r-nad-pokemontcgp-checkpoints/R-NaD-PokemonTCGP_experiments"

# 1. mlflow のデータは全て同期 (mlruns フォルダを同期)
print("Syncing all mlflow data...")
if os.path.exists(f"{BASE_DIR}/mlruns"):
    !gsutil -m rsync -r {BASE_DIR}/mlruns {DEST_BASE}/mlruns

# 2. チェックポイントは各ランごとに最古と最新のみ同期
print("Syncing oldest and newest checkpoints...")
CKPT_DIR = f"{BASE_DIR}/checkpoints"
if os.path.exists(CKPT_DIR):
    for run_id in os.listdir(CKPT_DIR):
        run_path = os.path.join(CKPT_DIR, run_id)
        if not os.path.isdir(run_path):
            continue
            
        files = os.listdir(run_path)
        ckpt_files = []
        for f in files:
            m = re.search(r"(\d+)", f)
            if m:
                ckpt_files.append((int(m.group(1)), f))
        
        if ckpt_files:
            ckpt_files.sort()
            targets = [ckpt_files[0][1], ckpt_files[-1][1]]
            targets = list(set(targets))
            
            print(f"  Run {run_id}: {targets}")
            for t in targets:
                src = f"{run_path}/{t}"
                dst = f"{DEST_BASE}/checkpoints/{run_id}/{t}"
                if os.path.isdir(src):
                    !gsutil -m cp -r {src} {dst}
                else:
                    !gsutil cp {src} {dst}
else:
    print("Checkpoint directory not found.")
