In [None]:
from google.colab import drive
drive.mount('/content/drive')

from google.colab import auth
auth.authenticate_user() # GCPアカウントでログイン

import os
import re

# Drive のベースディレクトリと GCS の同期先
BASE_DIR = "/content/drive/MyDrive/R-NaD-PokemonTCGP_experiments"
DEST_BASE = "gs://r-nad-pokemontcgp-checkpoints/R-NaD-PokemonTCGP_experiments"

# 1. mlflow のデータは全て同期 (mlruns フォルダを同期)
print("Syncing all mlflow data...")
if os.path.exists(f"{BASE_DIR}/mlruns"):
    print("MlFlow directory found.")
    !gsutil -m rsync -r {BASE_DIR}/mlruns {DEST_BASE}/mlruns

# 2. チェックポイントは各ランごとに最古と最新のみ同期
print("Syncing oldest and newest checkpoints...")
CKPT_DIR = f"{BASE_DIR}/checkpoints"

def sync_ckpts_in_dir(path, is_root=False, run_id=None):
    if not os.path.exists(path):
        return
    files = os.listdir(path)
    ckpt_files = []
    for f in files:
        if f.endswith(".pkl"):
            m = re.search(r"(\d+)", f)
            if m:
                ckpt_files.append((int(m.group(1)), f))
    
    if ckpt_files:
        ckpt_files.sort()
        targets = [ckpt_files[0][1], ckpt_files[-1][1]]
        targets = list(set(targets))
        
        dest_label = "root" if is_root else run_id
        print(f"  Syncing {dest_label}: {targets}")
        
        for t in targets:
            src = f"{path}/{t}"
            if is_root:
                # ルート直下のものは checkpoints/ 直下に置く
                dst = f"{DEST_BASE}/checkpoints/{t}"
            else:
                # ランIDがあるものは checkpoints/run_id/ 直下に置く
                dst = f"{DEST_BASE}/checkpoints/{run_id}/{t}"
            !gsutil cp {src} {dst}

if os.path.exists(CKPT_DIR):
    # A. ルートディレクトリ直下の .pkl を同期
    sync_ckpts_in_dir(CKPT_DIR, is_root=True)
    
    # B. 各ランIDディレクトリ内の .pkl を同期
    for run_id in os.listdir(CKPT_DIR):
        run_path = os.path.join(CKPT_DIR, run_id)
        if os.path.isdir(run_path):
            sync_ckpts_in_dir(run_path, is_root=False, run_id=run_id)
else:
    print("Checkpoint directory not found.")
