<a href="https://colab.research.google.com/github/kotaro-desu/google-colab/blob/main/Untitled40.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install optuna timm

Collecting optuna
  Downloading optuna-4.6.0-py3-none-any.whl.metadata (17 kB)
Collecting timm
  Downloading timm-1.0.22-py3-none-any.whl.metadata (63 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/63.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m63.1/63.1 kB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.17.2-py3-none-any.whl.metadata (7.2 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.10.1-py3-none-any.whl.metadata (11 kB)
Collecting sqlalchemy>=1.4.2 (from optuna)
  Downloading sqlalchemy-2.0.44-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.5 kB)
Collecting greenlet>=1 (from sqlalchemy>=1.4.2->optuna)
  Downloading greenlet-3.2.4-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl.metadata (4.1 kB)
Downloading optuna-4.6.0-py3-none-any.whl (404 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
#!/usr/bin/env python3
"""
EfficientNetV2-S Multi-Output Binary Classification with Optuna (TPU Version)
Modified for Google Cloud TPU (v6e/v5p etc.) using torch_xla with Mixed Precision (BF16)
Global Data Caching Added to prevent OOM.
"""

import os
# --- 重要: TPUでの混合精度(Mixed Precision)を有効化 ---
os.environ['XLA_USE_BF16'] = '1'

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import timm
import numpy as np
import pickle
from pathlib import Path
from tqdm import tqdm
import optuna
from optuna.pruners import MedianPruner
from optuna.samplers import TPESampler
import argparse
import json
from PIL import Image
import io
import sys
import time
import gc
import shutil
import random
import sqlite3
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score, average_precision_score

# --- TPU IMPORTS ---
try:
    import torch_xla.core.xla_model as xm
    import torch_xla.distributed.parallel_loader as pl
    import torch_xla.utils.utils as xu
except ImportError:
    print("[Warning] torch_xla not found. Please ensure you are running on a TPU runtime.")

# --- GLOBAL CONFIG (V5) ---
LOCAL_CACHE_DIR = Path('/content/temp_dataset_cache')
RESOLUTION = 300
MAX_EPOCHS = 100
PATIENCE = 5
EPOCH_MONITOR_START = 15
EPOCH_MONITOR_THRESHOLD = 0.5

# --- GLOBAL DATA CACHE (メモリクラッシュ対策) ---
# 実データはここに1つだけ存在し、全てのTrialがこれを参照します。
_GLOBAL_DATA_CACHE = {}

def set_seed(seed=42):
    """Sets the seed for reproducibility."""
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    print(f"Global seed set to: {seed}")

# --- V5: AUGMENTATION (Dynamic Magnitude) ---
def get_transforms(augment=True, magnitude=10):
    if augment:
        return transforms.Compose([
            transforms.RandAugment(num_ops=2, magnitude=magnitude),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])
    else:
        return transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])

# --- HELPER: FREEZE LAYERS ---
def freeze_layers(model, freeze_ratio=0.0):
    if freeze_ratio <= 0.0:
        for param in model.parameters():
            param.requires_grad = True
        return

    base = model.base_model
    conv_layers = []
    for name, module in base.named_modules():
        if isinstance(module, nn.Conv2d):
            conv_layers.append((name, module))

    num_layers = len(conv_layers)
    num_freeze = int(num_layers * freeze_ratio)

    frozen_names = set()
    for name, module in conv_layers[:num_freeze]:
        for param in module.parameters():
            param.requires_grad = False
        frozen_names.add(name.split('.')[0])

    for name, param in base.named_parameters():
        block_name = name.split('.')[0]
        if block_name in frozen_names:
            param.requires_grad = False

# --- INTEGRITY CHECK ---
def check_pickle_integrity(path):
    if not path.exists():
        return False
    try:
        with open(path, 'rb') as f:
            _ = pickle.load(f)
        return True
    except (EOFError, pickle.UnpicklingError, Exception) as e:
        print(f"    [Corrupt] File is broken: {path} ({e})")
        return False

# --- DATA LOADING FUNCTIONS ---
def get_file_list(base_dir, split, resolution):
    data_dir = Path(base_dir) / 'npz_datasets_v4' / f'npz_datasets_v4_universal_{resolution}px'
    if split == 'train':
        patterns = ['train_rain_*.npz', 'train_clear_*.npz']
    elif split == 'val_balanced':
        patterns = ['val_balanced_rain_*.npz', 'val_balanced_clear_*.npz']
    else:
        patterns = [f'{split}_*.npz']
    files = []
    for pat in patterns:
        files.extend(sorted(data_dir.glob(pat)))
    return files

def create_subset_two_pass(resolution, org_name, base_dir, split, target_size='3x', seed=42):
    np.random.seed(seed)
    random.seed(seed)

    files = get_file_list(base_dir, split, resolution)
    if not files:
        print(f"      [WARNING] No files found for {split} {resolution}px")
        return None, None

    # --- PASS 1: SCAN METADATA ---
    print(f"      [Pass 1] Scanning {split} labels...")
    file_map = []
    all_univ_levels = []
    all_org_labels = []
    key_error_reported = False

    for f_idx, chunk_file in enumerate(tqdm(files, desc="Scanning Labels", leave=False)):
        try:
            with np.load(chunk_file, allow_pickle=True) as data:
                levels_dict = data['levels'].item()
                if org_name not in levels_dict:
                    if not key_error_reported:
                        print(f"      [SKIP] Key '{org_name}' missing")
                        key_error_reported = True
                    continue
                lbls = levels_dict[org_name]
                univ = levels_dict['Universal']
                file_map.append({'path': chunk_file, 'count': len(lbls)})
                all_org_labels.append(lbls)
                all_univ_levels.append(univ)
        except Exception as e:
            print(f"      [ERROR] reading {chunk_file}: {e}")
            continue

    if not all_org_labels: return None, None

    full_org_labels = np.concatenate(all_org_labels, axis=0)
    full_univ_levels = np.concatenate(all_univ_levels, axis=0)
    total_samples = len(full_org_labels)

    base_count = 10000
    if target_size == '1x': target_count = base_count * 2
    elif target_size == '2x': target_count = base_count * 4
    elif target_size == '3x': target_count = base_count * 6
    else: target_count = base_count * 2

    if split == 'val_balanced': target_count = 20000
    if split == 'val_imbalanced': target_count = total_samples

    if split == 'val_imbalanced':
        if total_samples > target_count:
            selected_global_indices = np.random.choice(total_samples, target_count, replace=False)
        else:
            selected_global_indices = np.arange(total_samples)
    else:
        target_per_class = target_count // 2
        max_univ = full_univ_levels.max(axis=1)
        rain_indices = np.where(max_univ > 0)[0]
        clear_indices = np.where(max_univ == 0)[0]

        if len(rain_indices) >= target_per_class:
            sel_rain = np.random.choice(rain_indices, target_per_class, replace=False)
        else:
            sel_rain = np.concatenate([rain_indices, np.random.choice(rain_indices, target_per_class - len(rain_indices), replace=True)])

        if len(clear_indices) >= target_per_class:
            sel_clear = np.random.choice(clear_indices, target_per_class, replace=False)
        else:
            sel_clear = np.concatenate([clear_indices, np.random.choice(clear_indices, target_per_class - len(clear_indices), replace=True)])

        selected_global_indices = np.concatenate([sel_rain, sel_clear])

    np.random.shuffle(selected_global_indices)
    final_count = len(selected_global_indices)
    print(f"      Selected {final_count} samples (Target: {target_size})")

    del full_univ_levels, all_univ_levels, all_org_labels
    gc.collect()

    # --- PASS 2: EXTRACT IMAGES ---
    print(f"      [Pass 2] Extracting images...")
    files_to_load = {}
    sorted_indices = np.sort(selected_global_indices)

    current_file_idx = 0
    current_file_start = 0
    current_file_end = file_map[0]['count']

    for global_idx in sorted_indices:
        while global_idx >= current_file_end:
            current_file_idx += 1
            if current_file_idx >= len(file_map): break
            current_file_start = current_file_end
            current_file_end += file_map[current_file_idx]['count']

        if current_file_idx >= len(file_map): break
        local_idx = global_idx - current_file_start
        f_path = file_map[current_file_idx]['path']
        if f_path not in files_to_load: files_to_load[f_path] = []
        files_to_load[f_path].append(local_idx)

    sample_path = list(files_to_load.keys())[0]
    with np.load(sample_path, allow_pickle=True) as data:
        img_shape = data['images'][0].shape

    print(f"      Allocating memory for {final_count} images of shape {img_shape}...")
    final_images_arr = np.empty((final_count, *img_shape), dtype=np.uint8)
    final_labels_arr = np.empty((final_count, 9), dtype=np.int64)

    current_fill_idx = 0
    for f_path, local_indices in tqdm(files_to_load.items(), desc="Extracting", leave=False):
        try:
            with np.load(f_path, allow_pickle=True) as data:
                raw_imgs = data['images']
                levels_dict = data['levels'].item()
                raw_lbls = levels_dict[org_name]
                idx_arr = np.array(local_indices)
                batch_size = len(idx_arr)
                final_images_arr[current_fill_idx : current_fill_idx + batch_size] = raw_imgs[idx_arr]
                final_labels_arr[current_fill_idx : current_fill_idx + batch_size] = raw_lbls[idx_arr]
                current_fill_idx += batch_size
        except Exception: continue

    np.random.seed(seed)
    shuffle_idx = np.arange(final_count)
    np.random.shuffle(shuffle_idx)
    final_images_arr = final_images_arr[shuffle_idx]
    final_labels_arr = final_labels_arr[shuffle_idx]

    gc.collect()
    return final_images_arr, final_labels_arr

def prepare_and_cache_split(split, resolution, org_name, base_dir, drive_cache_dir, seed, target_size=None):
    file_name = f"{split}_{resolution}.pkl"
    local_path = LOCAL_CACHE_DIR / file_name
    drive_path = drive_cache_dir / file_name

    if local_path.exists():
        if check_pickle_integrity(local_path):
            print(f"    [Found] Local cache for {split} {resolution}px")
            return
        else:
            local_path.unlink()

    if drive_path.exists():
        print(f"    [Found] Drive cache for {split} {resolution}px. Copying...")
        try:
            shutil.copy(drive_path, local_path)
            if check_pickle_integrity(local_path): return
            else: local_path.unlink()
        except Exception: pass

    print(f"    [Create] Generating {split} {resolution}px data...")
    imgs, lbls = create_subset_two_pass(resolution, org_name, base_dir, split, target_size=target_size, seed=seed)

    if imgs is not None:
        with open(local_path, 'wb') as f:
            pickle.dump({'images': imgs, 'labels': lbls}, f, protocol=4)
        try:
            shutil.copy(local_path, drive_path)
        except Exception: pass
        del imgs, lbls
        gc.collect()

def prepare_all_resolutions(org_name, base_dir, seed=42):
    """V5: Prepares ONLY the fixed resolution (300px)."""
    print(f"\n{'='*20} PRE-CACHING DATASET (V5 Fixed Res, Seed={seed}) {'='*20}")
    LOCAL_CACHE_DIR.mkdir(parents=True, exist_ok=True)
    drive_cache_dir = Path(base_dir) / 'dataset_cache'
    drive_cache_dir.mkdir(parents=True, exist_ok=True)

    res = RESOLUTION
    print(f"\n>> Checking Resolution: {res}px")
    # ここで生成されるファイルには、target_size分のデータが含まれます
    prepare_and_cache_split('train', res, org_name, base_dir, drive_cache_dir, seed, target_size='3x')
    prepare_and_cache_split('val_balanced', res, org_name, base_dir, drive_cache_dir, seed, target_size='1x')
    prepare_and_cache_split('val_imbalanced', res, org_name, base_dir, drive_cache_dir, seed)
    print(f"\n{'='*20} CACHING CHECK COMPLETE {'='*20}\n")

# --- プレロード関数 ---
def preload_data_to_memory(resolution):
    """
    Optuna実行前に全データをメモリにロードします。
    これにより、トライアル中でのロードやメモリ確保によるオーバーヘッド/断片化を防ぎます。
    """
    global _GLOBAL_DATA_CACHE
    splits = ['train', 'val_balanced'] # val_imbalancedを使う場合はここに追加

    print(f"\n{'='*20} PRE-LOADING DATA TO MEMORY {'='*20}")
    for split in splits:
        cache_key = f"{split}_{resolution}"
        cache_path = LOCAL_CACHE_DIR / f"{split}_{resolution}.pkl"

        if not cache_path.exists():
            print(f"[ERROR] Cache file not found for {split}! Run prepare_all_resolutions first.")
            continue

        if cache_key in _GLOBAL_DATA_CACHE:
            print(f"[{split}] Already in memory.")
            continue

        print(f"[{split}] Loading from {cache_path} ...")
        try:
            with open(cache_path, 'rb') as f:
                data = pickle.load(f)

            # メモリに格納
            _GLOBAL_DATA_CACHE[cache_key] = {
                'images': data['images'],
                'labels': data['labels']
            }
            print(f"[{split}] Loaded. Shape: {data['images'].shape}")
            del data
            gc.collect()
        except Exception as e:
            print(f"[{split}] Error loading: {e}")

    print(f"{'='*20} PRE-LOAD COMPLETE {'='*20}\n")


class CachedBinaryDataset(Dataset):
    def __init__(self, split, resolution, transform=None, dataset_size='1x', seed=42):
        self.transform = transform
        self.seed = seed
        cache_key = f"{split}_{resolution}"

        # --- グローバルキャッシュ参照 (メモリコピー防止) ---
        global _GLOBAL_DATA_CACHE

        if cache_key not in _GLOBAL_DATA_CACHE:
            # プレロードされていない場合のフォールバック（通常は発生しない想定）
            print(f"[WARNING] {cache_key} not found in global cache. Loading now...")
            cache_path = LOCAL_CACHE_DIR / f"{split}_{resolution}.pkl"
            with open(cache_path, 'rb') as f:
                data = pickle.load(f)
            _GLOBAL_DATA_CACHE[cache_key] = {
                'images': data['images'],
                'labels': data['labels']
            }

        # 参照のみ取得（コピーしない）
        self.images_ref = _GLOBAL_DATA_CACHE[cache_key]['images']
        self.labels_ref = _GLOBAL_DATA_CACHE[cache_key]['labels']

        # 使用するインデックスを決定
        total_len = len(self.images_ref)
        self.indices = np.arange(total_len)

        # ダウンサンプリングが必要な場合はインデックスだけを間引く
        if split == 'train' and dataset_size != '3x':
            self._downsample_indices(dataset_size)

    def _downsample_indices(self, target_size):
        np.random.seed(self.seed)
        total_available = len(self.indices)
        target_count = 20000 if target_size == '1x' else 40000 if target_size == '2x' else None

        if target_count and target_count < total_available:
            # インデックスをシャッフルして間引く
            perm = np.random.permutation(total_available)[:target_count]
            self.indices = self.indices[perm]

    def __len__(self): return len(self.indices)

    def __getitem__(self, idx):
        # 保持しているインデックスを使って実データを参照
        real_idx = self.indices[idx]

        # 画像取得（ここまでは参照、Image化でコピー発生）
        img_arr = self.images_ref[real_idx]
        img = Image.fromarray(img_arr)

        if self.transform: img = self.transform(img)

        # ラベル取得とバイナリ化
        label_val = self.labels_ref[real_idx]
        # 配列全体に対して条件判定を行い、0/1に変換
        binary_label = (label_val > 0).astype(np.int64)

        return img, torch.from_numpy(binary_label).long()

class MultiOutputEfficientNetBinary(nn.Module):
    def __init__(self, model_name, dropout=0.0, pretrained=True,
                 head_layers=1, head_hidden_dim=512, head_dropout=0.2):
        super().__init__()
        self.base_model = timm.create_model(model_name, pretrained=pretrained, drop_rate=dropout, num_classes=0)
        in_features = self.base_model.num_features

        if head_layers == 1:
            self.head = nn.Linear(in_features, 9 * 2)
        elif head_layers == 2:
            self.head = nn.Sequential(
                nn.Linear(in_features, head_hidden_dim),
                nn.BatchNorm1d(head_hidden_dim),
                nn.SiLU(),
                nn.Dropout(head_dropout),
                nn.Linear(head_hidden_dim, 9 * 2)
            )
        elif head_layers == 3:
            self.head = nn.Sequential(
                nn.Linear(in_features, head_hidden_dim),
                nn.BatchNorm1d(head_hidden_dim),
                nn.SiLU(),
                nn.Dropout(head_dropout),
                nn.Linear(head_hidden_dim, head_hidden_dim),
                nn.BatchNorm1d(head_hidden_dim),
                nn.SiLU(),
                nn.Dropout(head_dropout),
                nn.Linear(head_hidden_dim, 9 * 2)
            )

    def forward(self, x):
        features = self.base_model(x)
        logits = self.head(features)
        return logits.view(-1, 9, 2)

# --- V5: EPOCH MONITOR CALLBACK ---
class EpochMonitorCallback:
    def __init__(self, start_trial=EPOCH_MONITOR_START, threshold=EPOCH_MONITOR_THRESHOLD):
        self.start_trial = start_trial
        self.threshold = threshold

    def __call__(self, study, trial):
        completed_trials = [t for t in study.trials if t.state == optuna.trial.TrialState.COMPLETE]
        if len(completed_trials) < self.start_trial:
            return

        max_epoch_count = 0
        for t in completed_trials:
            if 'final_epoch' in t.user_attrs and t.user_attrs['final_epoch'] >= MAX_EPOCHS:
                max_epoch_count += 1

        ratio = max_epoch_count / len(completed_trials)
        if ratio > self.threshold:
            print(f"\n[WARNING] {ratio*100:.1f}% of trials reached max epochs! Consider increasing MAX_EPOCHS.")

# --- V5: TRAIN FUNCTION (TPU Optimized: MpDeviceLoader) ---
def train_one_epoch(model, loader, criterion, optimizer, scheduler, device):
    model.train()
    running_loss = 0.0
    all_preds = []
    all_targets = []

    # --- 重要: ParallelLoaderを使ってデータ転送を非同期化 ---
    para_loader = pl.MpDeviceLoader(loader, device)

    for images, labels in tqdm(para_loader, desc="Training", leave=False):
        optimizer.zero_grad(set_to_none=True)

        outputs = model(images)
        loss = criterion(outputs.view(-1, 2), labels.view(-1))

        loss.backward()
        xm.optimizer_step(optimizer) # TPU barrier & update

        if scheduler is not None and isinstance(scheduler, optim.lr_scheduler.OneCycleLR):
            scheduler.step()

        running_loss += loss.item()

        with torch.no_grad():
            probs = torch.softmax(outputs, dim=2)[:, :, 1]
            preds = (probs > 0.5).long()
            # TPUからの転送を減らすため、精度計算は必要な場合のみ行うか、
            # まとめて行うのが良いが、ここではそのまま
            all_preds.append(preds.cpu().numpy())
            all_targets.append(labels.cpu().numpy())

    if len(all_preds) == 0:
        return 0.0, 0.0

    all_preds = np.concatenate(all_preds).flatten()
    all_targets = np.concatenate(all_targets).flatten()
    acc = (all_preds == all_targets).mean() * 100.0
    return running_loss / len(loader), acc

def validate(model, loader, criterion, device):
    model.eval()
    running_loss = 0.0
    all_probs = []
    all_targets = []

    para_loader = pl.MpDeviceLoader(loader, device)

    with torch.no_grad():
        for images, labels in tqdm(para_loader, desc="Validation", leave=False):
            outputs = model(images)
            loss = criterion(outputs.view(-1, 2), labels.view(-1))
            running_loss += loss.item()
            probs = torch.softmax(outputs, dim=2)[:, :, 1]
            all_probs.append(probs.cpu().numpy())
            all_targets.append(labels.cpu().numpy())

    if len(all_probs) == 0:
        return 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5

    all_probs = np.concatenate(all_probs).flatten()
    all_targets = np.concatenate(all_targets).flatten()

    best_f1 = 0.0
    best_thresh = 0.5
    best_metrics = {}

    thresholds = np.arange(0.1, 0.95, 0.05)
    for th in thresholds:
        preds = (all_probs > th).astype(int)
        f1 = f1_score(all_targets, preds, zero_division=0)
        if f1 > best_f1:
            best_f1 = f1
            best_thresh = th
            best_metrics = {
                'acc': (preds == all_targets).mean() * 100.0,
                'precision': precision_score(all_targets, preds, zero_division=0),
                'recall': recall_score(all_targets, preds, zero_division=0)
            }

    avg_loss = running_loss / len(loader)
    try:
        roc_auc = roc_auc_score(all_targets, all_probs)
        pr_auc = average_precision_score(all_targets, all_probs)
    except: roc_auc, pr_auc = 0.0, 0.0

    return avg_loss, best_metrics.get('acc', 0), best_metrics.get('precision', 0), \
           best_metrics.get('recall', 0), best_f1, roc_auc, pr_auc, best_thresh

# --- V5: OPTUNA OBJECTIVE ---
def objective(trial, device, seed):
    # --- V5 Hyperparameters ---
    dataset_size = trial.suggest_categorical('dataset_size', ['1x', '2x', '3x'])
    lr = trial.suggest_float('lr', 1e-5, 1e-2, log=True)
    batch_size = trial.suggest_categorical('batch_size', [64, 128])
    optimizer_name = trial.suggest_categorical('optimizer', ['Adam', 'AdamW', 'SGD'])
    weight_decay = trial.suggest_float('weight_decay', 1e-6, 1e-3, log=True)

    dropout = trial.suggest_float('dropout', 0.1, 0.4)
    freeze_ratio = trial.suggest_float('freeze_ratio', 0.0, 1.0)
    augment_magnitude = trial.suggest_int('augment_magnitude', 5, 15)
    scheduler_type = trial.suggest_categorical('scheduler', ['cosine', 'onecycle'])

    head_layers = trial.suggest_int('head_layers', 1, 3)
    head_hidden_dim = trial.suggest_categorical('head_hidden_dim', [512, 768, 1024, 1280])
    head_dropout = trial.suggest_float('head_dropout', 0.1, 0.5)

    print(f"\nTrial {trial.number}: size={dataset_size}, lr={lr:.2e}, batch={batch_size}, opt={optimizer_name}, "
          f"sched={scheduler_type}, mag={augment_magnitude}, drop={dropout:.2f}, freeze={freeze_ratio:.2f}")

    # Initialize Model
    model = MultiOutputEfficientNetBinary('tf_efficientnetv2_s', dropout=dropout, pretrained=True,
                                          head_layers=head_layers, head_hidden_dim=head_hidden_dim,
                                          head_dropout=head_dropout).to(device)
    freeze_layers(model, freeze_ratio)
    criterion = nn.CrossEntropyLoss()

    if optimizer_name == 'Adam': optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
    elif optimizer_name == 'AdamW': optimizer = optim.AdamW(model.parameters(), lr=lr, weight_decay=weight_decay)
    else: optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=weight_decay)

    try:
        # ここでキャッシュされたデータを参照するため、メモリ消費は増えない
        train_ds = CachedBinaryDataset('train', RESOLUTION, get_transforms(True, augment_magnitude), dataset_size, seed)
        val_bal_ds = CachedBinaryDataset('val_balanced', RESOLUTION, get_transforms(False), seed=seed)
    except FileNotFoundError: raise optuna.TrialPruned()

    num_workers = 4
    # persistent_workers=Trueはメモリリークの原因になりやすいため、
    # クラッシュ対策としてFalseに戻すか、ワーカー数を減らす手もあるが、
    # 今回はデータ共有化で解決を図るため、まずはTrueのままでいく。
    # もし再度クラッシュするならここをFalseにする。
    train_loader = DataLoader(
        train_ds, batch_size, True,
        num_workers=num_workers, drop_last=True,
        persistent_workers=True, pin_memory=True
    )
    val_bal_loader = DataLoader(
        val_bal_ds, batch_size*2, False,
        num_workers=num_workers,
        persistent_workers=True, pin_memory=True
    )

    if scheduler_type == 'cosine':
        scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=MAX_EPOCHS, eta_min=1e-6)
    else:
        scheduler = optim.lr_scheduler.OneCycleLR(optimizer, max_lr=lr, epochs=MAX_EPOCHS, steps_per_epoch=len(train_loader))

    best_val_f1 = 0.0
    patience_counter = 0
    final_epoch = 0

    for epoch in range(MAX_EPOCHS):
        final_epoch = epoch + 1

        train_loss, train_acc = train_one_epoch(model, train_loader, criterion, optimizer, scheduler, device)

        if scheduler is not None and not isinstance(scheduler, optim.lr_scheduler.OneCycleLR):
            scheduler.step()

        vb_metrics = validate(model, val_bal_loader, criterion, device)
        vb_loss, vb_acc, vb_prec, vb_rec, vb_f1, vb_auc, vb_pr, vb_th = vb_metrics

        current_lr = optimizer.param_groups[0]['lr']
        print(f"Epoch {epoch+1}/{MAX_EPOCHS}: Loss:{train_loss:.4f} | LR:{current_lr:.2e} | ValF1:{vb_f1:.4f}(@{vb_th:.2f})")

        trial.report(vb_f1, epoch)
        if trial.should_prune():
            raise optuna.TrialPruned()

        if vb_f1 > best_val_f1:
            best_val_f1 = vb_f1
            patience_counter = 0
        else:
            patience_counter += 1

        if patience_counter >= PATIENCE:
            print(f"  Early stopping at epoch {epoch+1}")
            break

    trial.set_user_attr('final_epoch', final_epoch)

    # --- メモリ解放強化 ---
    del train_ds, val_bal_ds, train_loader, val_bal_loader, model, optimizer, scheduler
    xm.mark_step() # TPUグラフのフラッシュ
    gc.collect() # Python側のGC

    return best_val_f1

def main(argv=None):
    parser = argparse.ArgumentParser()
    parser.add_argument('--org', type=str, required=True)
    parser.add_argument('--n_trials', type=int, default=100)
    parser.add_argument('--study_name', type=str, default=None)
    parser.add_argument('--base_dir', type=str, default='/content/drive/MyDrive/XRAIN/yano/20250601~20251020_dataset/')
    parser.add_argument('--seed', type=int, default=42)

    if argv is not None: args = parser.parse_args(argv)
    else: args = parser.parse_args()

    set_seed(args.seed)
    study_name = args.study_name or f'effv2_s_{args.org}_v5_binary_fixed'
    print(f"Study: {study_name}, Seed: {args.seed}, Org: {args.org}, Res: {RESOLUTION}px (TPU MODE)")

    prepare_all_resolutions(args.org, args.base_dir, seed=args.seed)

    # --- 重要: 実行前にデータをメモリにロード (3xのデータのみ) ---
    preload_data_to_memory(RESOLUTION)

    device = xm.xla_device()
    print(f"Using Device: {device}")

    output_dir = Path(args.base_dir) / 'output'
    progress_dir = output_dir / 'progress' / 'binary_v5'
    progress_dir.mkdir(parents=True, exist_ok=True)

    db_path = progress_dir / f"{study_name}.db"
    storage_url = f"sqlite:///{db_path}"

    print(f"Loading study from: {db_path}")

    try:
        study = optuna.create_study(
            study_name=study_name,
            storage=storage_url,
            load_if_exists=True,
            direction='maximize',
            sampler=TPESampler(seed=args.seed),
            pruner=MedianPruner(n_startup_trials=5, n_warmup_steps=10)
        )
        print(f"Study loaded. Completed trials: {len(study.trials)}")
    except Exception as e:
        print(f"Error loading study: {e}")
        if db_path.exists():
            print("Backing up corrupted/locked DB and creating new one...")
            shutil.move(str(db_path), str(db_path.with_suffix('.db.bak')))
        study = optuna.create_study(
            study_name=study_name,
            storage=storage_url,
            direction='maximize',
            sampler=TPESampler(seed=args.seed),
            pruner=MedianPruner(n_startup_trials=5, n_warmup_steps=10)
        )

    remaining = args.n_trials - len(study.trials)
    if remaining > 0:
        print(f"Resuming search for {remaining} more trials...")
        # gc_after_trial=Trueを明示（デフォルトですが念のため）
        study.optimize(lambda t: objective(t, device, args.seed),
                       n_trials=remaining,
                       callbacks=[EpochMonitorCallback()],
                       catch=(RuntimeError,),
                       gc_after_trial=True)
    else:
        print("All trials completed.")

    if len([t for t in study.trials if t.state == optuna.trial.TrialState.COMPLETE]) > 0:
        print(f"Best F1: {study.best_value:.4f}")
        print(f"Best params: {study.best_params}")
        with open(output_dir / f'optuna_results_{args.org}_v5_binary_tpu.json', 'w') as f:
            json.dump({
                'org': args.org,
                'best_trial': study.best_trial.number,
                'best_f1': study.best_value,
                'best_params': study.best_params,
                'seed': args.seed
            }, f, indent=2)

if __name__ == '__main__':
    if 'ipykernel' in sys.modules:
        main(['--org', 'JMA', '--seed', '42'])
    else:
        main()

Global seed set to: 42
Study: effv2_s_JMA_v5_binary_fixed, Seed: 42, Org: JMA, Res: 300px (TPU MODE)


>> Checking Resolution: 300px
    [Found] Drive cache for train 300px. Copying...
    [Found] Drive cache for val_balanced 300px. Copying...
    [Found] Drive cache for val_imbalanced 300px. Copying...



[train] Loading from /content/temp_dataset_cache/train_300.pkl ...
[train] Loaded. Shape: (60000, 300, 300, 3)
[val_balanced] Loading from /content/temp_dataset_cache/val_balanced_300.pkl ...
[val_balanced] Loaded. Shape: (20000, 300, 300, 3)



  device = xm.xla_device()


Using Device: xla:0
Loading study from: /content/drive/MyDrive/XRAIN/yano/20250601~20251020_dataset/output/progress/binary_v5/effv2_s_JMA_v5_binary_fixed.db


[I 2025-11-29 10:33:50,787] Using an existing study with name 'effv2_s_JMA_v5_binary_fixed' instead of creating a new one.


Study loaded. Completed trials: 89
Resuming search for 11 more trials...

Trial 89: size=3x, lr=5.66e-04, batch=128, opt=AdamW, sched=cosine, mag=5, drop=0.29, freeze=0.43


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


model.safetensors:   0%|          | 0.00/86.5M [00:00<?, ?B/s]



Epoch 1/100: Loss:0.4499 | LR:5.65e-04 | ValF1:0.7529(@0.40)




Epoch 2/100: Loss:0.4216 | LR:5.65e-04 | ValF1:0.7581(@0.40)




Epoch 3/100: Loss:0.4126 | LR:5.64e-04 | ValF1:0.7596(@0.40)




Epoch 4/100: Loss:0.4055 | LR:5.63e-04 | ValF1:0.7611(@0.40)




Epoch 5/100: Loss:0.4014 | LR:5.62e-04 | ValF1:0.7649(@0.40)




Epoch 6/100: Loss:0.3961 | LR:5.61e-04 | ValF1:0.7659(@0.40)




Epoch 7/100: Loss:0.3937 | LR:5.59e-04 | ValF1:0.7661(@0.40)




Epoch 8/100: Loss:0.3910 | LR:5.57e-04 | ValF1:0.7659(@0.35)




Epoch 9/100: Loss:0.3893 | LR:5.54e-04 | ValF1:0.7680(@0.40)




Epoch 10/100: Loss:0.3881 | LR:5.52e-04 | ValF1:0.7691(@0.40)


[I 2025-11-29 10:46:06,431] Trial 89 pruned. 


Epoch 11/100: Loss:0.3852 | LR:5.49e-04 | ValF1:0.7684(@0.40)

Trial 90: size=3x, lr=8.90e-04, batch=128, opt=AdamW, sched=cosine, mag=5, drop=0.24, freeze=0.45




Epoch 1/100: Loss:0.4429 | LR:8.90e-04 | ValF1:0.7539(@0.45)




Epoch 2/100: Loss:0.4125 | LR:8.89e-04 | ValF1:0.7598(@0.40)




Epoch 3/100: Loss:0.4046 | LR:8.88e-04 | ValF1:0.7628(@0.40)




Epoch 4/100: Loss:0.3975 | LR:8.86e-04 | ValF1:0.7660(@0.35)




Epoch 5/100: Loss:0.3920 | LR:8.84e-04 | ValF1:0.7670(@0.40)




Epoch 6/100: Loss:0.3884 | LR:8.82e-04 | ValF1:0.7679(@0.35)




Epoch 7/100: Loss:0.3821 | LR:8.79e-04 | ValF1:0.7681(@0.35)




Epoch 8/100: Loss:0.3815 | LR:8.76e-04 | ValF1:0.7709(@0.35)




Epoch 9/100: Loss:0.3792 | LR:8.72e-04 | ValF1:0.7681(@0.35)




Epoch 10/100: Loss:0.3768 | LR:8.68e-04 | ValF1:0.7709(@0.40)


[I 2025-11-29 10:55:25,068] Trial 90 pruned. 


Epoch 11/100: Loss:0.3744 | LR:8.64e-04 | ValF1:0.7700(@0.40)

Trial 91: size=3x, lr=1.89e-04, batch=128, opt=AdamW, sched=cosine, mag=5, drop=0.23, freeze=0.58




Epoch 1/100: Loss:0.4641 | LR:1.89e-04 | ValF1:0.7503(@0.40)




Epoch 2/100: Loss:0.4352 | LR:1.89e-04 | ValF1:0.7538(@0.45)




Epoch 3/100: Loss:0.4266 | LR:1.88e-04 | ValF1:0.7559(@0.45)




Epoch 4/100: Loss:0.4222 | LR:1.88e-04 | ValF1:0.7580(@0.45)




Epoch 5/100: Loss:0.4178 | LR:1.88e-04 | ValF1:0.7580(@0.45)




Epoch 6/100: Loss:0.4139 | LR:1.87e-04 | ValF1:0.7610(@0.45)




Epoch 7/100: Loss:0.4133 | LR:1.87e-04 | ValF1:0.7612(@0.45)




Epoch 8/100: Loss:0.4106 | LR:1.86e-04 | ValF1:0.7619(@0.45)




Epoch 9/100: Loss:0.4079 | LR:1.85e-04 | ValF1:0.7611(@0.45)




Epoch 10/100: Loss:0.4062 | LR:1.84e-04 | ValF1:0.7625(@0.40)


[I 2025-11-29 11:04:48,281] Trial 91 pruned. 


Epoch 11/100: Loss:0.4051 | LR:1.83e-04 | ValF1:0.7630(@0.45)

Trial 92: size=3x, lr=4.26e-03, batch=128, opt=AdamW, sched=cosine, mag=6, drop=0.32, freeze=0.34




Epoch 1/100: Loss:0.4392 | LR:4.26e-03 | ValF1:0.7582(@0.40)




Epoch 2/100: Loss:0.4133 | LR:4.25e-03 | ValF1:0.7603(@0.40)




Epoch 3/100: Loss:0.4069 | LR:4.25e-03 | ValF1:0.7587(@0.45)




Epoch 4/100: Loss:0.4045 | LR:4.24e-03 | ValF1:0.7640(@0.30)




Epoch 5/100: Loss:0.4044 | LR:4.23e-03 | ValF1:0.7632(@0.40)




Epoch 6/100: Loss:0.4128 | LR:4.22e-03 | ValF1:0.7662(@0.40)




Epoch 7/100: Loss:0.4338 | LR:4.21e-03 | ValF1:0.7518(@0.30)




Epoch 8/100: Loss:0.4363 | LR:4.19e-03 | ValF1:0.7402(@0.20)




Epoch 9/100: Loss:0.4360 | LR:4.17e-03 | ValF1:0.7570(@0.30)




Epoch 10/100: Loss:0.4340 | LR:4.15e-03 | ValF1:0.7610(@0.30)


[I 2025-11-29 11:14:14,506] Trial 92 pruned. 


Epoch 11/100: Loss:0.4413 | LR:4.13e-03 | ValF1:0.7460(@0.45)

Trial 93: size=1x, lr=2.52e-04, batch=128, opt=AdamW, sched=cosine, mag=5, drop=0.28, freeze=0.41




Epoch 1/100: Loss:0.4829 | LR:2.52e-04 | ValF1:0.7417(@0.45)




Epoch 2/100: Loss:0.4450 | LR:2.52e-04 | ValF1:0.7478(@0.50)




Epoch 3/100: Loss:0.4424 | LR:2.52e-04 | ValF1:0.7494(@0.40)




Epoch 4/100: Loss:0.4329 | LR:2.51e-04 | ValF1:0.7503(@0.45)




Epoch 5/100: Loss:0.4271 | LR:2.51e-04 | ValF1:0.7490(@0.40)




Epoch 6/100: Loss:0.4240 | LR:2.50e-04 | ValF1:0.7512(@0.45)




Epoch 7/100: Loss:0.4199 | LR:2.49e-04 | ValF1:0.7530(@0.40)




Epoch 8/100: Loss:0.4174 | LR:2.48e-04 | ValF1:0.7541(@0.40)




Epoch 9/100: Loss:0.4124 | LR:2.47e-04 | ValF1:0.7537(@0.45)




Epoch 10/100: Loss:0.4108 | LR:2.46e-04 | ValF1:0.7525(@0.45)


[I 2025-11-29 11:22:02,349] Trial 93 pruned. 


Epoch 11/100: Loss:0.4060 | LR:2.45e-04 | ValF1:0.7511(@0.40)

Trial 94: size=3x, lr=1.12e-03, batch=128, opt=Adam, sched=cosine, mag=6, drop=0.25, freeze=0.63




Epoch 1/100: Loss:0.4485 | LR:1.12e-03 | ValF1:0.7580(@0.40)




Epoch 2/100: Loss:0.4168 | LR:1.12e-03 | ValF1:0.7589(@0.40)




Epoch 3/100: Loss:0.4071 | LR:1.12e-03 | ValF1:0.7650(@0.40)




Epoch 4/100: Loss:0.3995 | LR:1.12e-03 | ValF1:0.7645(@0.35)




Epoch 5/100: Loss:0.3954 | LR:1.12e-03 | ValF1:0.7642(@0.40)




Epoch 6/100: Loss:0.3939 | LR:1.11e-03 | ValF1:0.7663(@0.35)




Epoch 7/100: Loss:0.3896 | LR:1.11e-03 | ValF1:0.7676(@0.35)




Epoch 8/100: Loss:0.3859 | LR:1.11e-03 | ValF1:0.7692(@0.40)




Epoch 9/100: Loss:0.3837 | LR:1.10e-03 | ValF1:0.7671(@0.35)




Epoch 10/100: Loss:0.3817 | LR:1.10e-03 | ValF1:0.7689(@0.35)


[I 2025-11-29 11:34:57,337] Trial 94 pruned. 


Epoch 11/100: Loss:0.3810 | LR:1.09e-03 | ValF1:0.7694(@0.35)

Trial 95: size=3x, lr=7.49e-04, batch=128, opt=AdamW, sched=cosine, mag=5, drop=0.27, freeze=0.29




Epoch 1/100: Loss:0.4453 | LR:7.49e-04 | ValF1:0.7550(@0.45)




Epoch 2/100: Loss:0.4180 | LR:7.48e-04 | ValF1:0.7610(@0.45)




Epoch 3/100: Loss:0.4074 | LR:7.47e-04 | ValF1:0.7611(@0.35)




Epoch 4/100: Loss:0.4021 | LR:7.46e-04 | ValF1:0.7642(@0.35)




Epoch 5/100: Loss:0.3954 | LR:7.44e-04 | ValF1:0.7645(@0.35)




Epoch 6/100: Loss:0.3913 | LR:7.42e-04 | ValF1:0.7687(@0.40)




Epoch 7/100: Loss:0.3876 | LR:7.40e-04 | ValF1:0.7674(@0.35)




Epoch 8/100: Loss:0.3847 | LR:7.37e-04 | ValF1:0.7672(@0.35)




Epoch 9/100: Loss:0.3826 | LR:7.34e-04 | ValF1:0.7696(@0.40)




Epoch 10/100: Loss:0.3815 | LR:7.30e-04 | ValF1:0.7684(@0.35)


[I 2025-11-29 11:44:47,590] Trial 95 pruned. 


Epoch 11/100: Loss:0.3797 | LR:7.27e-04 | ValF1:0.7694(@0.40)

Trial 96: size=3x, lr=4.45e-04, batch=128, opt=Adam, sched=cosine, mag=6, drop=0.33, freeze=0.60




Epoch 1/100: Loss:0.4628 | LR:4.45e-04 | ValF1:0.7504(@0.45)




Epoch 2/100: Loss:0.4327 | LR:4.45e-04 | ValF1:0.7543(@0.45)




Epoch 3/100: Loss:0.4234 | LR:4.45e-04 | ValF1:0.7567(@0.45)




Epoch 4/100: Loss:0.4185 | LR:4.44e-04 | ValF1:0.7632(@0.40)




Epoch 5/100: Loss:0.4110 | LR:4.43e-04 | ValF1:0.7598(@0.45)




Epoch 6/100: Loss:0.4091 | LR:4.42e-04 | ValF1:0.7635(@0.40)




Epoch 7/100: Loss:0.4075 | LR:4.40e-04 | ValF1:0.7638(@0.45)




Epoch 8/100: Loss:0.4036 | LR:4.39e-04 | ValF1:0.7640(@0.45)




Epoch 9/100: Loss:0.4005 | LR:4.37e-04 | ValF1:0.7649(@0.40)




Epoch 10/100: Loss:0.4014 | LR:4.35e-04 | ValF1:0.7658(@0.40)


[I 2025-11-29 11:57:47,685] Trial 96 pruned. 


Epoch 11/100: Loss:0.3978 | LR:4.32e-04 | ValF1:0.7653(@0.35)

Trial 97: size=3x, lr=3.26e-04, batch=64, opt=AdamW, sched=cosine, mag=9, drop=0.31, freeze=0.46




Epoch 1/100: Loss:0.4629 | LR:3.26e-04 | ValF1:0.7491(@0.45)




Epoch 2/100: Loss:0.4403 | LR:3.25e-04 | ValF1:0.7532(@0.40)




Epoch 3/100: Loss:0.4300 | LR:3.25e-04 | ValF1:0.7557(@0.45)




Epoch 4/100: Loss:0.4250 | LR:3.24e-04 | ValF1:0.7580(@0.40)




Epoch 5/100: Loss:0.4228 | LR:3.24e-04 | ValF1:0.7600(@0.45)




Epoch 6/100: Loss:0.4197 | LR:3.23e-04 | ValF1:0.7581(@0.45)




Epoch 7/100: Loss:0.4175 | LR:3.22e-04 | ValF1:0.7613(@0.45)




Epoch 8/100: Loss:0.4155 | LR:3.21e-04 | ValF1:0.7637(@0.40)




Epoch 9/100: Loss:0.4136 | LR:3.19e-04 | ValF1:0.7638(@0.40)




Epoch 10/100: Loss:0.4142 | LR:3.18e-04 | ValF1:0.7643(@0.45)


[I 2025-11-29 12:16:16,058] Trial 97 pruned. 


Epoch 11/100: Loss:0.4100 | LR:3.16e-04 | ValF1:0.7645(@0.45)

Trial 98: size=3x, lr=6.20e-04, batch=128, opt=SGD, sched=onecycle, mag=5, drop=0.29, freeze=0.53




Epoch 1/100: Loss:0.7209 | LR:2.64e-05 | ValF1:0.5326(@0.15)




Epoch 2/100: Loss:0.7198 | LR:3.13e-05 | ValF1:0.5325(@0.15)




Epoch 3/100: Loss:0.7179 | LR:3.94e-05 | ValF1:0.5326(@0.20)




Epoch 4/100: Loss:0.7154 | LR:5.05e-05 | ValF1:0.5326(@0.15)




Epoch 5/100: Loss:0.7132 | LR:6.47e-05 | ValF1:0.5326(@0.20)




Epoch 6/100: Loss:0.7102 | LR:8.16e-05 | ValF1:0.5327(@0.25)




Epoch 7/100: Loss:0.7057 | LR:1.01e-04 | ValF1:0.5328(@0.25)




Epoch 8/100: Loss:0.7004 | LR:1.23e-04 | ValF1:0.5331(@0.25)




Epoch 9/100: Loss:0.6964 | LR:1.47e-04 | ValF1:0.5340(@0.30)




Epoch 10/100: Loss:0.6913 | LR:1.74e-04 | ValF1:0.5347(@0.30)


[I 2025-11-29 12:28:59,176] Trial 98 pruned. 


Epoch 11/100: Loss:0.6867 | LR:2.01e-04 | ValF1:0.5369(@0.35)

Trial 99: size=3x, lr=1.20e-04, batch=128, opt=AdamW, sched=cosine, mag=5, drop=0.17, freeze=0.37




Epoch 1/100: Loss:0.4830 | LR:1.20e-04 | ValF1:0.7426(@0.50)




Epoch 2/100: Loss:0.4455 | LR:1.20e-04 | ValF1:0.7498(@0.45)




Epoch 3/100: Loss:0.4367 | LR:1.20e-04 | ValF1:0.7515(@0.45)




Epoch 4/100: Loss:0.4323 | LR:1.20e-04 | ValF1:0.7538(@0.45)




Epoch 5/100: Loss:0.4277 | LR:1.19e-04 | ValF1:0.7540(@0.50)




Epoch 6/100: Loss:0.4268 | LR:1.19e-04 | ValF1:0.7559(@0.45)




Epoch 7/100: Loss:0.4225 | LR:1.19e-04 | ValF1:0.7569(@0.45)




Epoch 8/100: Loss:0.4213 | LR:1.18e-04 | ValF1:0.7563(@0.45)




Epoch 9/100: Loss:0.4204 | LR:1.18e-04 | ValF1:0.7574(@0.45)




Epoch 10/100: Loss:0.4169 | LR:1.17e-04 | ValF1:0.7583(@0.45)


[I 2025-11-29 12:38:53,684] Trial 99 pruned. 


Epoch 11/100: Loss:0.4164 | LR:1.17e-04 | ValF1:0.7590(@0.45)
Best F1: 0.7773
Best params: {'dataset_size': '3x', 'lr': 0.0003816070082609463, 'batch_size': 128, 'optimizer': 'AdamW', 'weight_decay': 3.1752459999560188e-06, 'dropout': 0.16571637839896394, 'freeze_ratio': 0.519121762590083, 'augment_magnitude': 6, 'scheduler': 'cosine', 'head_layers': 3, 'head_hidden_dim': 512, 'head_dropout': 0.1967610726584872}


In [None]:
!pip install optuna timm

Collecting optuna
  Downloading optuna-4.6.0-py3-none-any.whl.metadata (17 kB)
Collecting timm
  Downloading timm-1.0.22-py3-none-any.whl.metadata (63 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/63.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m63.1/63.1 kB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.17.2-py3-none-any.whl.metadata (7.2 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.10.1-py3-none-any.whl.metadata (11 kB)
Collecting sqlalchemy>=1.4.2 (from optuna)
  Downloading sqlalchemy-2.0.44-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.5 kB)
Collecting greenlet>=1 (from sqlalchemy>=1.4.2->optuna)
  Downloading greenlet-3.2.4-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl.metadata (4.1 kB)
Downloading optuna-4.6.0-py3-none-any.whl (404 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
#!/usr/bin/env python3
"""
ViT-Small Multi-Output Binary Classification with Optuna (TPU Optimized Version)
Modified for Google Cloud TPU (v6e/v5p etc.) using torch_xla with Mixed Precision (BF16)

[Fixes & Improvements]
1. Explicit xm.mark_step() in validation loop to prevent OOM (Graph explosion).
2. Pre-resizing images to 224px during caching to save memory and CPU load.
3. persistent_workers=False to prevent zombie processes in Optuna trials.
4. Global Data Caching to minimize disk I/O.
"""

import os
# --- 重要: TPUでの混合精度(Mixed Precision)を有効化 ---
os.environ['XLA_USE_BF16'] = '1'

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import timm
import numpy as np
import pickle
from pathlib import Path
from tqdm import tqdm
import optuna
from optuna.pruners import MedianPruner
from optuna.samplers import TPESampler
import argparse
import json
from PIL import Image
import io
import sys
import time
import gc
import shutil
import random
import sqlite3
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score, average_precision_score

# --- TPU IMPORTS ---
try:
    import torch_xla.core.xla_model as xm
    import torch_xla.distributed.parallel_loader as pl
    import torch_xla.utils.utils as xu
    import torch_xla
    HAS_XLA = True
except ImportError:
    print("[Warning] torch_xla not found. Please ensure you are running on a TPU runtime.")
    HAS_XLA = False

# --- GLOBAL CONFIG ---
LOCAL_CACHE_DIR = Path('/content/temp_dataset_cache_vit')
# ViT standard resolution
RESOLUTION = 224
MAX_EPOCHS = 100
PATIENCE = 5
EPOCH_MONITOR_START = 15
EPOCH_MONITOR_THRESHOLD = 0.5

# --- GLOBAL DATA CACHE (メモリ節約のため参照渡し用) ---
_GLOBAL_DATA_CACHE = {}

def set_seed(seed=42):
    """Sets the seed for reproducibility."""
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    print(f"Global seed set to: {seed}")

# --- TRANSFORMS (Resizeは事前に行うためここでは不要) ---
def get_transforms(augment=True, magnitude=10):
    if augment:
        return transforms.Compose([
            # Resizeはキャッシュ作成時に完了しているため削除
            transforms.RandAugment(num_ops=2, magnitude=magnitude),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])
    else:
        return transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])

# --- HELPER: FREEZE ViT LAYERS ---
def freeze_vit_layers(model, freeze_ratio=0.0):
    if freeze_ratio <= 0.0:
        for param in model.parameters():
            param.requires_grad = True
        return

    # Embedding layers
    if hasattr(model.base_model, 'patch_embed'):
        for param in model.base_model.patch_embed.parameters():
            param.requires_grad = False
    if hasattr(model.base_model, 'pos_embed'):
        model.base_model.pos_embed.requires_grad = False
    if hasattr(model.base_model, 'cls_token'):
        model.base_model.cls_token.requires_grad = False

    # Transformer blocks
    if hasattr(model.base_model, 'blocks'):
        num_blocks = len(model.base_model.blocks)
        num_freeze = int(num_blocks * freeze_ratio)
        if num_freeze > 0:
            print(f"  Freezing {num_freeze}/{num_blocks} Transformer blocks")
            for i in range(num_freeze):
                for param in model.base_model.blocks[i].parameters():
                    param.requires_grad = False

# --- INTEGRITY CHECK ---
def check_pickle_integrity(path):
    if not path.exists(): return False
    try:
        with open(path, 'rb') as f:
            _ = pickle.load(f)
        return True
    except Exception as e:
        print(f"    [Corrupt] File is broken: {path} ({e})")
        return False

# --- DATA LOADING FUNCTIONS ---
def get_file_list(base_dir, split, resolution):
    # 解像度に関わらずデータを探す（読み込み時にリサイズするため）
    # 優先順: 224px -> 300px -> その他
    candidates = [224, 300, 128, 185, 242]
    data_dir = None

    for res in candidates:
        d = Path(base_dir) / 'npz_datasets_v4' / f'npz_datasets_v4_universal_{res}px'
        if d.exists():
            data_dir = d
            print(f"      [Found] Source data at {res}px (will resize to {resolution}px)")
            break

    if data_dir is None:
        return []

    if split == 'train':
        patterns = ['train_rain_*.npz', 'train_clear_*.npz']
    elif split == 'val_balanced':
        patterns = ['val_balanced_rain_*.npz', 'val_balanced_clear_*.npz']
    else:
        patterns = [f'{split}_*.npz']

    files = []
    for pat in patterns:
        files.extend(sorted(data_dir.glob(pat)))
    return files

def create_subset_two_pass(resolution, org_name, base_dir, split, target_size='3x', seed=42):
    np.random.seed(seed)
    random.seed(seed)

    files = get_file_list(base_dir, split, resolution)
    if not files:
        print(f"      [WARNING] No files found for {split}")
        return None, None

    # --- PASS 1: SCAN METADATA ---
    print(f"      [Pass 1] Scanning {split} labels...")
    file_map = []
    all_univ_levels = []
    all_org_labels = []

    for chunk_file in tqdm(files, desc="Scanning Labels", leave=False):
        try:
            with np.load(chunk_file, allow_pickle=True) as data:
                levels_dict = data['levels'].item()
                if org_name not in levels_dict: continue

                lbls = levels_dict[org_name]
                univ = levels_dict['Universal']
                file_map.append({'path': chunk_file, 'count': len(lbls)})
                all_org_labels.append(lbls)
                all_univ_levels.append(univ)
        except Exception: continue

    if not all_org_labels: return None, None

    full_org_labels = np.concatenate(all_org_labels, axis=0)
    full_univ_levels = np.concatenate(all_univ_levels, axis=0)
    total_samples = len(full_org_labels)

    # --- Selection Logic ---
    base_count = 10000
    if target_size == '1x': target_count = base_count * 2
    elif target_size == '2x': target_count = base_count * 4
    elif target_size == '3x': target_count = base_count * 6
    else: target_count = base_count * 2

    if split == 'val_balanced': target_count = 20000
    if split == 'val_imbalanced': target_count = total_samples

    if split == 'val_imbalanced':
        if total_samples > target_count:
            selected_global_indices = np.random.choice(total_samples, target_count, replace=False)
        else:
            selected_global_indices = np.arange(total_samples)
    else:
        target_per_class = target_count // 2
        max_univ = full_univ_levels.max(axis=1)
        rain_indices = np.where(max_univ > 0)[0]
        clear_indices = np.where(max_univ == 0)[0]

        if len(rain_indices) >= target_per_class:
            sel_rain = np.random.choice(rain_indices, target_per_class, replace=False)
        else:
            sel_rain = np.concatenate([rain_indices, np.random.choice(rain_indices, target_per_class - len(rain_indices), replace=True)])

        if len(clear_indices) >= target_per_class:
            sel_clear = np.random.choice(clear_indices, target_per_class, replace=False)
        else:
            sel_clear = np.concatenate([clear_indices, np.random.choice(clear_indices, target_per_class - len(clear_indices), replace=True)])

        selected_global_indices = np.concatenate([sel_rain, sel_clear])

    np.random.shuffle(selected_global_indices)
    final_count = len(selected_global_indices)
    print(f"      Selected {final_count} samples (Target: {target_size})")

    del full_univ_levels, all_org_labels
    gc.collect()

    # --- PASS 2: EXTRACT & RESIZE IMAGES ---
    print(f"      [Pass 2] Extracting and RESIZING to {resolution}px...")

    files_to_load = {}
    sorted_indices = np.sort(selected_global_indices)
    current_file_idx = 0
    current_file_start = 0
    current_file_end = file_map[0]['count']

    for global_idx in sorted_indices:
        while global_idx >= current_file_end:
            current_file_idx += 1
            if current_file_idx >= len(file_map): break
            current_file_start = current_file_end
            current_file_end += file_map[current_file_idx]['count']
        if current_file_idx >= len(file_map): break

        local_idx = global_idx - current_file_start
        f_path = file_map[current_file_idx]['path']
        if f_path not in files_to_load: files_to_load[f_path] = []
        files_to_load[f_path].append(local_idx)

    # Allocate Memory (Fixed to RESOLUTION)
    img_shape = (resolution, resolution, 3)
    print(f"      Allocating memory for {final_count} images of shape {img_shape}...")

    final_images_arr = np.empty((final_count, *img_shape), dtype=np.uint8)
    final_labels_arr = np.empty((final_count, 9), dtype=np.int64)

    current_fill_idx = 0

    for f_path, local_indices in tqdm(files_to_load.items(), desc="Processing", leave=False):
        try:
            with np.load(f_path, allow_pickle=True) as data:
                raw_imgs = data['images']
                levels_dict = data['levels'].item()
                raw_lbls = levels_dict[org_name]

                idx_arr = np.array(local_indices)

                # Batch processing with individual resize
                batch_raw = raw_imgs[idx_arr]
                batch_lbls = raw_lbls[idx_arr]

                resized_batch = []
                for i in range(len(batch_raw)):
                    # Resize logic: Numpy -> PIL -> Resize -> Numpy
                    img = Image.fromarray(batch_raw[i])
                    if img.size != (resolution, resolution):
                        img = img.resize((resolution, resolution), Image.BICUBIC)
                    resized_batch.append(np.array(img))

                batch_size = len(resized_batch)
                final_images_arr[current_fill_idx : current_fill_idx + batch_size] = np.array(resized_batch)
                final_labels_arr[current_fill_idx : current_fill_idx + batch_size] = batch_lbls

                current_fill_idx += batch_size

        except Exception as e:
            print(f"Error processing {f_path}: {e}")
            continue

    # Final shuffle
    np.random.seed(seed)
    shuffle_idx = np.arange(final_count)
    np.random.shuffle(shuffle_idx)
    final_images_arr = final_images_arr[shuffle_idx]
    final_labels_arr = final_labels_arr[shuffle_idx]

    gc.collect()
    return final_images_arr, final_labels_arr

def prepare_and_cache_split(split, resolution, org_name, base_dir, drive_cache_dir, seed, target_size=None):
    file_name = f"{split}_{resolution}_vit.pkl"
    local_path = LOCAL_CACHE_DIR / file_name
    drive_path = drive_cache_dir / file_name

    if local_path.exists():
        if check_pickle_integrity(local_path):
            print(f"    [Found] Local cache for {split} {resolution}px")
            return
        else: local_path.unlink()

    if drive_path.exists():
        print(f"    [Found] Drive cache for {split}. Copying...")
        try:
            shutil.copy(drive_path, local_path)
            if check_pickle_integrity(local_path): return
            else: local_path.unlink()
        except Exception: pass

    print(f"    [Create] Generating {split} {resolution}px data...")
    imgs, lbls = create_subset_two_pass(resolution, org_name, base_dir, split, target_size=target_size, seed=seed)

    if imgs is not None:
        with open(local_path, 'wb') as f:
            pickle.dump({'images': imgs, 'labels': lbls}, f, protocol=4)
        try: shutil.copy(local_path, drive_path)
        except Exception: pass
        del imgs, lbls
        gc.collect()

def prepare_all_resolutions(org_name, base_dir, seed=42):
    print(f"\n{'='*20} PRE-CACHING DATASET (ViT {RESOLUTION}px) {'='*20}")
    LOCAL_CACHE_DIR.mkdir(parents=True, exist_ok=True)
    drive_cache_dir = Path(base_dir) / 'dataset_cache_vit'
    drive_cache_dir.mkdir(parents=True, exist_ok=True)

    prepare_and_cache_split('train', RESOLUTION, org_name, base_dir, drive_cache_dir, seed, target_size='3x')
    prepare_and_cache_split('val_balanced', RESOLUTION, org_name, base_dir, drive_cache_dir, seed, target_size='1x')
    # prepare_and_cache_split('val_imbalanced', RESOLUTION, org_name, base_dir, drive_cache_dir, seed)
    print(f"\n{'='*20} CACHING COMPLETE {'='*20}\n")

def preload_data_to_memory(resolution):
    global _GLOBAL_DATA_CACHE
    splits = ['train', 'val_balanced']

    print(f"\n{'='*20} PRE-LOADING DATA TO MEMORY {'='*20}")
    for split in splits:
        cache_key = f"{split}_{resolution}_vit"
        cache_path = LOCAL_CACHE_DIR / f"{split}_{resolution}_vit.pkl"

        if not cache_path.exists(): continue
        if cache_key in _GLOBAL_DATA_CACHE: continue

        print(f"[{split}] Loading {cache_path} ...")
        try:
            with open(cache_path, 'rb') as f:
                data = pickle.load(f)
            _GLOBAL_DATA_CACHE[cache_key] = {'images': data['images'], 'labels': data['labels']}
            print(f"[{split}] Loaded. Shape: {data['images'].shape}")
            del data
            gc.collect()
        except Exception as e:
            print(f"[{split}] Error: {e}")
    print(f"{'='*20} LOAD COMPLETE {'='*20}\n")


class CachedBinaryDataset(Dataset):
    def __init__(self, split, resolution, transform=None, dataset_size='1x', seed=42):
        self.transform = transform
        self.seed = seed
        cache_key = f"{split}_{resolution}_vit"
        global _GLOBAL_DATA_CACHE

        if cache_key not in _GLOBAL_DATA_CACHE:
            # Fallback load
            cache_path = LOCAL_CACHE_DIR / f"{split}_{resolution}_vit.pkl"
            with open(cache_path, 'rb') as f:
                data = pickle.load(f)
            _GLOBAL_DATA_CACHE[cache_key] = {'images': data['images'], 'labels': data['labels']}

        self.images_ref = _GLOBAL_DATA_CACHE[cache_key]['images']
        self.labels_ref = _GLOBAL_DATA_CACHE[cache_key]['labels']
        self.indices = np.arange(len(self.images_ref))

        if split == 'train' and dataset_size != '3x':
            self._downsample_indices(dataset_size)

    def _downsample_indices(self, target_size):
        np.random.seed(self.seed)
        total_available = len(self.indices)
        target_count = 20000 if target_size == '1x' else 40000 if target_size == '2x' else None
        if target_count and target_count < total_available:
            perm = np.random.permutation(total_available)[:target_count]
            self.indices = self.indices[perm]

    def __len__(self): return len(self.indices)

    def __getitem__(self, idx):
        real_idx = self.indices[idx]
        img_arr = self.images_ref[real_idx]
        img = Image.fromarray(img_arr)
        if self.transform: img = self.transform(img)
        label_val = self.labels_ref[real_idx]
        return img, torch.from_numpy((label_val > 0).astype(np.int64)).long()

class MultiOutputViTBinary(nn.Module):
    def __init__(self, model_name, dropout=0.0, pretrained=True,
                 head_layers=1, head_hidden_dim=384, head_dropout=0.2):
        super().__init__()
        self.base_model = timm.create_model(model_name, pretrained=pretrained,
                                            drop_rate=dropout, num_classes=0)
        in_features = self.base_model.num_features

        if head_layers == 1:
            self.head = nn.Linear(in_features, 9 * 2)
        elif head_layers == 2:
            self.head = nn.Sequential(
                nn.Linear(in_features, head_hidden_dim),
                nn.BatchNorm1d(head_hidden_dim),
                nn.SiLU(),
                nn.Dropout(head_dropout),
                nn.Linear(head_hidden_dim, 9 * 2)
            )
        elif head_layers == 3:
            self.head = nn.Sequential(
                nn.Linear(in_features, head_hidden_dim),
                nn.BatchNorm1d(head_hidden_dim),
                nn.SiLU(),
                nn.Dropout(head_dropout),
                nn.Linear(head_hidden_dim, head_hidden_dim),
                nn.BatchNorm1d(head_hidden_dim),
                nn.SiLU(),
                nn.Dropout(head_dropout),
                nn.Linear(head_hidden_dim, 9 * 2)
            )

    def forward(self, x):
        features = self.base_model(x)
        logits = self.head(features)
        return logits.view(-1, 9, 2)

class EpochMonitorCallback:
    def __init__(self, start_trial=EPOCH_MONITOR_START, threshold=EPOCH_MONITOR_THRESHOLD):
        self.start_trial = start_trial
        self.threshold = threshold
    def __call__(self, study, trial):
        completed = [t for t in study.trials if t.state == optuna.trial.TrialState.COMPLETE]
        if len(completed) < self.start_trial: return
        max_cnt = sum(1 for t in completed if t.user_attrs.get('final_epoch', 0) >= MAX_EPOCHS)
        if max_cnt / len(completed) > self.threshold:
            print(f"\n[WARNING] Many trials reached max epochs.")

def train_one_epoch(model, loader, criterion, optimizer, scheduler, device):
    model.train()
    running_loss = 0.0
    num_batches = 0
    # TPU: Use Parallel Loader
    para_loader = pl.MpDeviceLoader(loader, device)

    for images, labels in tqdm(para_loader, desc="Train", leave=False):
        optimizer.zero_grad(set_to_none=True)
        outputs = model(images)
        loss = criterion(outputs.view(-1, 2), labels.view(-1))
        loss.backward()
        xm.optimizer_step(optimizer) # Marks step implicitly

        if scheduler and isinstance(scheduler, optim.lr_scheduler.OneCycleLR):
            scheduler.step()
        running_loss += loss.item()
        num_batches += 1

        # Periodic sync to prevent graph explosion (every 100 batches)
        if HAS_XLA and num_batches % 100 == 0:
            torch_xla.sync()

    return running_loss / len(loader)

def validate(model, loader, criterion, device):
    model.eval()
    running_loss = 0.0
    all_probs = []
    all_targets = []

    para_loader = pl.MpDeviceLoader(loader, device)

    with torch.no_grad():
        for images, labels in tqdm(para_loader, desc="Val", leave=False):
            outputs = model(images)
            loss = criterion(outputs.view(-1, 2), labels.view(-1))
            running_loss += loss.item()

            probs = torch.softmax(outputs, dim=2)[:, :, 1]
            all_probs.append(probs.cpu().numpy())
            all_targets.append(labels.cpu().numpy())

            # 【CRITICAL FIX】 Sync to free TPU graph!
            if HAS_XLA:
                torch_xla.sync()

    if not all_probs: return 0.0, 0.0, 0.0

    all_probs = np.concatenate(all_probs).flatten()
    all_targets = np.concatenate(all_targets).flatten()

    # Calculate Best F1
    best_f1 = 0.0
    best_th = 0.5
    for th in np.arange(0.1, 0.95, 0.05):
        preds = (all_probs > th).astype(int)
        f1 = f1_score(all_targets, preds, zero_division=0)
        if f1 > best_f1:
            best_f1 = f1
            best_th = th

    return running_loss/len(loader), best_f1, best_th

def objective(trial, device, seed):
    # --- Params ---
    dataset_size = trial.suggest_categorical('dataset_size', ['1x', '2x', '3x'])
    lr = trial.suggest_float('lr', 1e-5, 1e-2, log=True)
    batch_size = trial.suggest_categorical('batch_size', [64, 128])
    optimizer_name = trial.suggest_categorical('optimizer', ['Adam', 'AdamW', 'SGD'])
    weight_decay = trial.suggest_float('weight_decay', 1e-6, 1e-3, log=True)
    dropout = trial.suggest_float('dropout', 0.1, 0.4)
    freeze_ratio = trial.suggest_float('freeze_ratio', 0.0, 1.0)
    augment_magnitude = trial.suggest_int('augment_magnitude', 5, 15)
    scheduler_type = trial.suggest_categorical('scheduler', ['cosine', 'onecycle'])
    head_layers = trial.suggest_int('head_layers', 1, 3)
    head_hidden_dim = trial.suggest_categorical('head_hidden_dim', [256, 384, 512, 768])
    head_dropout = trial.suggest_float('head_dropout', 0.1, 0.5)

    print(f"\nTrial {trial.number}: size={dataset_size}, lr={lr:.2e}, batch={batch_size}, "
          f"mag={augment_magnitude}, freeze={freeze_ratio:.2f}")

    # --- Model ---
    model = MultiOutputViTBinary('vit_small_patch16_224', dropout=dropout, pretrained=True,
                                 head_layers=head_layers, head_hidden_dim=head_hidden_dim,
                                 head_dropout=head_dropout).to(device)
    freeze_vit_layers(model, freeze_ratio)
    criterion = nn.CrossEntropyLoss()

    if optimizer_name == 'Adam': optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
    elif optimizer_name == 'AdamW': optimizer = optim.AdamW(model.parameters(), lr=lr, weight_decay=weight_decay)
    else: optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=weight_decay)

    try:
        train_ds = CachedBinaryDataset('train', RESOLUTION, get_transforms(True, augment_magnitude), dataset_size, seed)
        val_bal_ds = CachedBinaryDataset('val_balanced', RESOLUTION, get_transforms(False), seed=seed)
    except FileNotFoundError: raise optuna.TrialPruned()

    # 【CRITICAL FIX】 persistent_workers=False to prevent zombie processes
    # 【FIX】 pin_memory=False for TPU (not used and causes warnings)
    num_workers = 2  # Reduced for stability
    train_loader = DataLoader(train_ds, batch_size, True, num_workers=num_workers,
                              persistent_workers=False, pin_memory=False, drop_last=True)
    val_bal_loader = DataLoader(val_bal_ds, batch_size*2, False, num_workers=num_workers,
                                persistent_workers=False, pin_memory=False)

    if scheduler_type == 'cosine':
        scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=MAX_EPOCHS, eta_min=1e-6)
    else:
        scheduler = optim.lr_scheduler.OneCycleLR(optimizer, max_lr=lr, epochs=MAX_EPOCHS, steps_per_epoch=len(train_loader))

    best_val_f1 = 0.0
    patience_counter = 0

    for epoch in range(MAX_EPOCHS):
        train_loss = train_one_epoch(model, train_loader, criterion, optimizer, scheduler, device)
        if scheduler and not isinstance(scheduler, optim.lr_scheduler.OneCycleLR): scheduler.step()

        val_loss, val_f1, val_th = validate(model, val_bal_loader, criterion, device)

        current_lr = optimizer.param_groups[0]['lr']
        print(f"Epoch {epoch+1}: Loss:{train_loss:.4f} | F1:{val_f1:.4f}(@{val_th:.2f})")

        trial.report(val_f1, epoch)
        if trial.should_prune():
            raise optuna.TrialPruned()

        if val_f1 > best_val_f1:
            best_val_f1 = val_f1
            patience_counter = 0
        else:
            patience_counter += 1

        if patience_counter >= PATIENCE:
            print(f"  Early stopping at epoch {epoch+1}")
            break

    trial.set_user_attr('final_epoch', epoch+1)

    # Clean up
    del model, optimizer, scheduler, train_loader, val_bal_loader
    if HAS_XLA:
        torch_xla.sync()
    gc.collect()
    gc.collect()  # Double GC for TPU memory

    return best_val_f1

def main(argv=None):
    parser = argparse.ArgumentParser()
    parser.add_argument('--org', type=str, required=True)
    parser.add_argument('--n_trials', type=int, default=100)
    parser.add_argument('--study_name', type=str, default=None)
    parser.add_argument('--base_dir', type=str, default='/content/drive/MyDrive/XRAIN/yano/20250601~20251020_dataset/')
    parser.add_argument('--seed', type=int, default=42)

    if argv is not None: args = parser.parse_args(argv)
    else: args = parser.parse_args()

    set_seed(args.seed)
    study_name = args.study_name or f'vit_small_{args.org}_v5_binary_fix'
    print(f"Study: {study_name}, Res: {RESOLUTION}px (TPU)")

    # 1. Prepare Data (Resize here!)
    prepare_all_resolutions(args.org, args.base_dir, seed=args.seed)

    # 2. Load to Memory
    preload_data_to_memory(RESOLUTION)

    # 3. Setup TPU
    device = xm.xla_device()
    print(f"Using Device: {device}")

    # 4. Optuna
    output_dir = Path(args.base_dir) / 'output'
    progress_dir = output_dir / 'progress' / 'binary_vit_v5'
    progress_dir.mkdir(parents=True, exist_ok=True)
    db_path = progress_dir / f"{study_name}.db"
    storage_url = f"sqlite:///{db_path}"

    # Ensure DB directory exists and is writable
    db_path.parent.mkdir(parents=True, exist_ok=True)
    print(f"DB Path: {db_path}")
    print(f"DB exists: {db_path.exists()}")

    try:
        study = optuna.create_study(study_name=study_name, storage=storage_url, load_if_exists=True,
                                    direction='maximize', sampler=TPESampler(seed=args.seed),
                                    pruner=MedianPruner(n_startup_trials=5, n_warmup_steps=10))
        print(f"Loaded existing study with {len(study.trials)} trials")
    except Exception as e:
        print(f"Failed to load study: {e}")
        if db_path.exists():
            backup_path = db_path.with_suffix('.db.bak')
            print(f"Backing up corrupted DB to {backup_path}")
            shutil.move(str(db_path), str(backup_path))
        study = optuna.create_study(study_name=study_name, storage=storage_url, direction='maximize',
                                    sampler=TPESampler(seed=args.seed), pruner=MedianPruner())
        print(f"Created new study")

    remaining = args.n_trials - len(study.trials)
    if remaining > 0:
        study.optimize(lambda t: objective(t, device, args.seed), n_trials=remaining,
                       callbacks=[EpochMonitorCallback()], gc_after_trial=True)

    if len(study.trials) > 0:
        print(f"Best F1: {study.best_value:.4f}")
        print(f"Best params: {study.best_params}")
        with open(output_dir / f'optuna_results_{args.org}_vit_v5_final.json', 'w') as f:
            json.dump({'best_f1': study.best_value, 'params': study.best_params}, f, indent=2)

if __name__ == '__main__':
    if 'ipykernel' in sys.modules:
        main(['--org', 'JMA', '--seed', '42'])
    else:
        main()

Global seed set to: 42
Study: vit_small_JMA_v5_binary_fix, Res: 224px (TPU)

    [Found] Drive cache for train. Copying...
    [Found] Drive cache for val_balanced. Copying...



[train] Loading /content/temp_dataset_cache_vit/train_224_vit.pkl ...
[train] Loaded. Shape: (60000, 224, 224, 3)
[val_balanced] Loading /content/temp_dataset_cache_vit/val_balanced_224_vit.pkl ...
[val_balanced] Loaded. Shape: (20000, 224, 224, 3)

Using Device: xla:0


  device = xm.xla_device()


DB Path: /content/drive/MyDrive/XRAIN/yano/20250601~20251020_dataset/output/progress/binary_vit_v5/vit_small_JMA_v5_binary_fix.db
DB exists: True


[I 2025-12-01 06:11:23,513] Using an existing study with name 'vit_small_JMA_v5_binary_fix' instead of creating a new one.


Loaded existing study with 48 trials

Trial 48: size=3x, lr=1.80e-04, batch=64, mag=15, freeze=0.01


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


model.safetensors:   0%|          | 0.00/88.2M [00:00<?, ?B/s]



Epoch 1: Loss:0.4183 | F1:0.7722(@0.30)




Epoch 2: Loss:0.3922 | F1:0.7846(@0.55)




Epoch 3: Loss:0.3820 | F1:0.7833(@0.30)




Epoch 4: Loss:0.3767 | F1:0.7879(@0.40)




Epoch 5: Loss:0.3723 | F1:0.7884(@0.35)




Epoch 6: Loss:0.3699 | F1:0.7894(@0.45)




Epoch 7: Loss:0.3659 | F1:0.7855(@0.35)




Epoch 8: Loss:0.3629 | F1:0.7853(@0.45)




Epoch 9: Loss:0.3620 | F1:0.7924(@0.30)




Epoch 10: Loss:0.3593 | F1:0.7906(@0.45)




Epoch 11: Loss:0.3575 | F1:0.7900(@0.60)




Epoch 12: Loss:0.3557 | F1:0.7933(@0.30)




Epoch 13: Loss:0.3535 | F1:0.7933(@0.40)




Epoch 14: Loss:0.3525 | F1:0.7941(@0.35)




Epoch 15: Loss:0.3509 | F1:0.7938(@0.40)




Epoch 16: Loss:0.3494 | F1:0.7936(@0.50)




Epoch 17: Loss:0.3469 | F1:0.7848(@0.35)




Epoch 18: Loss:0.3462 | F1:0.7920(@0.35)




Epoch 19: Loss:0.3450 | F1:0.7906(@0.45)
  Early stopping at epoch 19


[I 2025-12-01 06:41:54,126] Trial 48 finished with value: 0.7941270574851577 and parameters: {'dataset_size': '3x', 'lr': 0.00017992070302280062, 'batch_size': 64, 'optimizer': 'Adam', 'weight_decay': 0.000751299389868149, 'dropout': 0.20732230517057945, 'freeze_ratio': 0.010121219136082965, 'augment_magnitude': 15, 'scheduler': 'cosine', 'head_layers': 2, 'head_hidden_dim': 384, 'head_dropout': 0.27438050048137524}. Best is trial 44 with value: 0.7970972008722697.



Trial 49: size=3x, lr=9.29e-05, batch=128, mag=6, freeze=0.12
  Freezing 1/12 Transformer blocks




Epoch 1: Loss:0.4305 | F1:0.7828(@0.45)




Epoch 2: Loss:0.3795 | F1:0.7880(@0.50)




Epoch 3: Loss:0.3643 | F1:0.7907(@0.40)




Epoch 4: Loss:0.3549 | F1:0.7921(@0.35)




Epoch 5: Loss:0.3469 | F1:0.7932(@0.40)




Epoch 6: Loss:0.3396 | F1:0.7882(@0.40)




Epoch 7: Loss:0.3324 | F1:0.7935(@0.50)




Epoch 8: Loss:0.3265 | F1:0.7941(@0.40)




Epoch 9: Loss:0.3197 | F1:0.7940(@0.40)




Epoch 10: Loss:0.3142 | F1:0.7911(@0.35)




Epoch 11: Loss:0.3084 | F1:0.7936(@0.35)




Epoch 12: Loss:0.3030 | F1:0.7932(@0.35)




Epoch 13: Loss:0.2970 | F1:0.7921(@0.30)
  Early stopping at epoch 13


[I 2025-12-01 06:54:15,213] Trial 49 finished with value: 0.7940863661015517 and parameters: {'dataset_size': '3x', 'lr': 9.289506909823928e-05, 'batch_size': 128, 'optimizer': 'Adam', 'weight_decay': 0.0004325328706425506, 'dropout': 0.3069403752886583, 'freeze_ratio': 0.1160479629046232, 'augment_magnitude': 6, 'scheduler': 'cosine', 'head_layers': 3, 'head_hidden_dim': 384, 'head_dropout': 0.24626644678589957}. Best is trial 44 with value: 0.7970972008722697.



Trial 50: size=2x, lr=1.27e-05, batch=64, mag=14, freeze=1.00
  Freezing 11/12 Transformer blocks




Epoch 1: Loss:1.0713 | F1:0.5164(@0.10)




Epoch 2: Loss:1.0310 | F1:0.5168(@0.10)




Epoch 3: Loss:1.0306 | F1:0.5169(@0.10)




Epoch 4: Loss:1.0279 | F1:0.5169(@0.10)




Epoch 5: Loss:1.0285 | F1:0.5169(@0.10)




Epoch 6: Loss:1.0268 | F1:0.5168(@0.10)




Epoch 7: Loss:1.0295 | F1:0.5168(@0.10)




Epoch 8: Loss:1.0286 | F1:0.5168(@0.10)




Epoch 9: Loss:1.0277 | F1:0.5169(@0.10)




Epoch 10: Loss:1.0292 | F1:0.5170(@0.10)


[I 2025-12-01 07:01:35,714] Trial 50 pruned. 


Epoch 11: Loss:1.0292 | F1:0.5169(@0.10)

Trial 51: size=1x, lr=4.92e-04, batch=128, mag=9, freeze=0.56
  Freezing 6/12 Transformer blocks




Epoch 1: Loss:0.4874 | F1:0.7609(@0.40)




Epoch 2: Loss:0.4103 | F1:0.7668(@0.35)




Epoch 3: Loss:0.3909 | F1:0.7744(@0.40)




Epoch 4: Loss:0.3902 | F1:0.7727(@0.45)




Epoch 5: Loss:0.3767 | F1:0.7722(@0.45)




Epoch 6: Loss:0.3701 | F1:0.7730(@0.30)




Epoch 7: Loss:0.3624 | F1:0.7751(@0.35)




Epoch 8: Loss:0.3562 | F1:0.7752(@0.45)




Epoch 9: Loss:0.3456 | F1:0.7771(@0.30)




Epoch 10: Loss:0.3401 | F1:0.7762(@0.50)


[I 2025-12-01 07:06:23,610] Trial 51 pruned. 


Epoch 11: Loss:0.3369 | F1:0.7745(@0.40)

Trial 52: size=3x, lr=2.70e-04, batch=128, mag=13, freeze=0.16
  Freezing 1/12 Transformer blocks




Epoch 1: Loss:0.6925 | F1:0.5900(@0.50)




Epoch 2: Loss:0.6305 | F1:0.6839(@0.50)




Epoch 3: Loss:0.5889 | F1:0.7157(@0.55)




Epoch 4: Loss:0.5403 | F1:0.7360(@0.50)




Epoch 5: Loss:0.4987 | F1:0.7473(@0.45)




Epoch 6: Loss:0.4721 | F1:0.7571(@0.50)




Epoch 7: Loss:0.4514 | F1:0.7642(@0.45)




Epoch 8: Loss:0.4341 | F1:0.7707(@0.55)




Epoch 9: Loss:0.4201 | F1:0.7746(@0.60)




Epoch 10: Loss:0.4086 | F1:0.7777(@0.40)


[I 2025-12-01 07:17:09,641] Trial 52 pruned. 


Epoch 11: Loss:0.4002 | F1:0.7768(@0.30)

Trial 53: size=3x, lr=3.62e-04, batch=128, mag=15, freeze=0.06




Epoch 1: Loss:0.6865 | F1:0.6840(@0.55)




Epoch 2: Loss:0.6090 | F1:0.7206(@0.50)




Epoch 3: Loss:0.5691 | F1:0.7418(@0.50)




Epoch 4: Loss:0.5360 | F1:0.7533(@0.55)




Epoch 5: Loss:0.5082 | F1:0.7623(@0.50)




Epoch 6: Loss:0.4800 | F1:0.7694(@0.45)




Epoch 7: Loss:0.4564 | F1:0.7762(@0.50)




Epoch 8: Loss:0.4413 | F1:0.7803(@0.50)




Epoch 9: Loss:0.4284 | F1:0.7847(@0.55)




Epoch 10: Loss:0.4151 | F1:0.7867(@0.35)


[I 2025-12-01 07:28:25,335] Trial 53 pruned. 


Epoch 11: Loss:0.4037 | F1:0.7889(@0.60)

Trial 54: size=3x, lr=1.48e-04, batch=128, mag=13, freeze=0.21
  Freezing 2/12 Transformer blocks




Epoch 1: Loss:0.4139 | F1:0.7790(@0.40)




Epoch 2: Loss:0.3791 | F1:0.7852(@0.40)




Epoch 3: Loss:0.3669 | F1:0.7891(@0.50)




Epoch 4: Loss:0.3589 | F1:0.7905(@0.35)




Epoch 5: Loss:0.3526 | F1:0.7913(@0.35)




Epoch 6: Loss:0.3473 | F1:0.7933(@0.40)




Epoch 7: Loss:0.3402 | F1:0.7918(@0.40)




Epoch 8: Loss:0.3378 | F1:0.7893(@0.45)




Epoch 9: Loss:0.3329 | F1:0.7921(@0.40)




Epoch 10: Loss:0.3297 | F1:0.7949(@0.45)




Epoch 11: Loss:0.3246 | F1:0.7915(@0.35)




Epoch 12: Loss:0.3215 | F1:0.7934(@0.30)




Epoch 13: Loss:0.3161 | F1:0.7928(@0.40)




Epoch 14: Loss:0.3135 | F1:0.7908(@0.50)




Epoch 15: Loss:0.3101 | F1:0.7942(@0.45)
  Early stopping at epoch 15


[I 2025-12-01 07:41:48,149] Trial 54 finished with value: 0.7948909439888326 and parameters: {'dataset_size': '3x', 'lr': 0.00014824905467453555, 'batch_size': 128, 'optimizer': 'Adam', 'weight_decay': 0.0005805106825377609, 'dropout': 0.2238379470330669, 'freeze_ratio': 0.2147915147114171, 'augment_magnitude': 13, 'scheduler': 'cosine', 'head_layers': 2, 'head_hidden_dim': 768, 'head_dropout': 0.2836991953509912}. Best is trial 44 with value: 0.7970972008722697.



Trial 55: size=3x, lr=1.46e-04, batch=128, mag=14, freeze=0.73
  Freezing 8/12 Transformer blocks




Epoch 1: Loss:0.4446 | F1:0.7659(@0.45)




Epoch 2: Loss:0.4075 | F1:0.7717(@0.35)




Epoch 3: Loss:0.3981 | F1:0.7761(@0.50)




Epoch 4: Loss:0.3916 | F1:0.7699(@0.35)




Epoch 5: Loss:0.3873 | F1:0.7782(@0.40)




Epoch 6: Loss:0.3830 | F1:0.7809(@0.45)




Epoch 7: Loss:0.3794 | F1:0.7770(@0.35)




Epoch 8: Loss:0.3783 | F1:0.7757(@0.35)




Epoch 9: Loss:0.3756 | F1:0.7807(@0.45)




Epoch 10: Loss:0.3730 | F1:0.7828(@0.35)


[I 2025-12-01 07:51:52,835] Trial 55 pruned. 


Epoch 11: Loss:0.3717 | F1:0.7812(@0.40)

Trial 56: size=3x, lr=1.08e-04, batch=128, mag=12, freeze=0.22
  Freezing 2/12 Transformer blocks




Epoch 1: Loss:0.4305 | F1:0.7755(@0.55)




Epoch 2: Loss:0.3846 | F1:0.7860(@0.45)




Epoch 3: Loss:0.3724 | F1:0.7832(@0.45)




Epoch 4: Loss:0.3628 | F1:0.7885(@0.50)




Epoch 5: Loss:0.3572 | F1:0.7918(@0.50)




Epoch 6: Loss:0.3512 | F1:0.7938(@0.35)




Epoch 7: Loss:0.3469 | F1:0.7929(@0.40)




Epoch 8: Loss:0.3435 | F1:0.7947(@0.30)




Epoch 9: Loss:0.3391 | F1:0.7938(@0.40)




Epoch 10: Loss:0.3334 | F1:0.7938(@0.35)




Epoch 11: Loss:0.3297 | F1:0.7934(@0.40)




Epoch 12: Loss:0.3261 | F1:0.7884(@0.25)




Epoch 13: Loss:0.3224 | F1:0.7934(@0.50)
  Early stopping at epoch 13


[I 2025-12-01 08:03:45,432] Trial 56 finished with value: 0.7946556924204414 and parameters: {'dataset_size': '3x', 'lr': 0.00010822570389889164, 'batch_size': 128, 'optimizer': 'Adam', 'weight_decay': 0.0007193657724403033, 'dropout': 0.20325999260224967, 'freeze_ratio': 0.21755161575603252, 'augment_magnitude': 12, 'scheduler': 'cosine', 'head_layers': 2, 'head_hidden_dim': 384, 'head_dropout': 0.2879259337101082}. Best is trial 44 with value: 0.7970972008722697.



Trial 57: size=3x, lr=2.07e-04, batch=128, mag=15, freeze=0.36
  Freezing 4/12 Transformer blocks




Epoch 1: Loss:0.4157 | F1:0.7754(@0.50)




Epoch 2: Loss:0.3868 | F1:0.7806(@0.30)




Epoch 3: Loss:0.3754 | F1:0.7842(@0.40)




Epoch 4: Loss:0.3695 | F1:0.7870(@0.55)




Epoch 5: Loss:0.3626 | F1:0.7875(@0.40)




Epoch 6: Loss:0.3586 | F1:0.7905(@0.45)




Epoch 7: Loss:0.3532 | F1:0.7907(@0.30)




Epoch 8: Loss:0.3506 | F1:0.7915(@0.40)




Epoch 9: Loss:0.3476 | F1:0.7904(@0.45)




Epoch 10: Loss:0.3437 | F1:0.7912(@0.50)




Epoch 11: Loss:0.3413 | F1:0.7921(@0.40)




Epoch 12: Loss:0.3383 | F1:0.7918(@0.35)




Epoch 13: Loss:0.3345 | F1:0.7918(@0.45)




Epoch 14: Loss:0.3311 | F1:0.7923(@0.30)




Epoch 15: Loss:0.3280 | F1:0.7938(@0.40)




Epoch 16: Loss:0.3269 | F1:0.7905(@0.40)




Epoch 17: Loss:0.3226 | F1:0.7897(@0.45)




Epoch 18: Loss:0.3204 | F1:0.7908(@0.50)




Epoch 19: Loss:0.3169 | F1:0.7885(@0.35)




Epoch 20: Loss:0.3162 | F1:0.7919(@0.40)
  Early stopping at epoch 20


[I 2025-12-01 08:21:02,326] Trial 57 finished with value: 0.7937810103743798 and parameters: {'dataset_size': '3x', 'lr': 0.00020703265291534308, 'batch_size': 128, 'optimizer': 'Adam', 'weight_decay': 0.00042715404793181467, 'dropout': 0.18230466473445436, 'freeze_ratio': 0.3626204702023617, 'augment_magnitude': 15, 'scheduler': 'cosine', 'head_layers': 2, 'head_hidden_dim': 768, 'head_dropout': 0.2524924553918848}. Best is trial 44 with value: 0.7970972008722697.



Trial 58: size=3x, lr=9.11e-05, batch=64, mag=13, freeze=0.27
  Freezing 3/12 Transformer blocks




Epoch 1: Loss:0.4253 | F1:0.7778(@0.40)




Epoch 2: Loss:0.3873 | F1:0.7824(@0.50)




Epoch 3: Loss:0.3756 | F1:0.7877(@0.45)




Epoch 4: Loss:0.3686 | F1:0.7891(@0.50)




Epoch 5: Loss:0.3599 | F1:0.7913(@0.40)




Epoch 6: Loss:0.3554 | F1:0.7905(@0.35)




Epoch 7: Loss:0.3502 | F1:0.7914(@0.50)




Epoch 8: Loss:0.3467 | F1:0.7911(@0.40)




Epoch 9: Loss:0.3429 | F1:0.7940(@0.45)




Epoch 10: Loss:0.3398 | F1:0.7941(@0.40)




Epoch 11: Loss:0.3370 | F1:0.7939(@0.40)




Epoch 12: Loss:0.3343 | F1:0.7932(@0.40)




Epoch 13: Loss:0.3314 | F1:0.7916(@0.45)




Epoch 14: Loss:0.3276 | F1:0.7908(@0.50)




Epoch 15: Loss:0.3264 | F1:0.7942(@0.45)




Epoch 16: Loss:0.3220 | F1:0.7931(@0.45)




Epoch 17: Loss:0.3202 | F1:0.7923(@0.40)




Epoch 18: Loss:0.3193 | F1:0.7947(@0.40)




Epoch 19: Loss:0.3155 | F1:0.7916(@0.45)




Epoch 20: Loss:0.3145 | F1:0.7913(@0.45)




Epoch 21: Loss:0.3135 | F1:0.7926(@0.45)




Epoch 22: Loss:0.3114 | F1:0.7875(@0.35)




Epoch 23: Loss:0.3099 | F1:0.7935(@0.40)
  Early stopping at epoch 23


[I 2025-12-01 08:54:09,926] Trial 58 finished with value: 0.7947417263827738 and parameters: {'dataset_size': '3x', 'lr': 9.114061306401837e-05, 'batch_size': 64, 'optimizer': 'AdamW', 'weight_decay': 0.0005515584003541748, 'dropout': 0.24046542117374695, 'freeze_ratio': 0.270747939164664, 'augment_magnitude': 13, 'scheduler': 'cosine', 'head_layers': 2, 'head_hidden_dim': 768, 'head_dropout': 0.28479728485074185}. Best is trial 44 with value: 0.7970972008722697.



Trial 59: size=3x, lr=1.58e-04, batch=128, mag=12, freeze=0.31
  Freezing 3/12 Transformer blocks




Epoch 1: Loss:0.4310 | F1:0.7720(@0.35)




Epoch 2: Loss:0.3882 | F1:0.7841(@0.40)




Epoch 3: Loss:0.3743 | F1:0.7841(@0.35)




Epoch 4: Loss:0.3655 | F1:0.7882(@0.40)




Epoch 5: Loss:0.3570 | F1:0.7890(@0.35)




Epoch 6: Loss:0.3506 | F1:0.7934(@0.35)




Epoch 7: Loss:0.3466 | F1:0.7918(@0.40)




Epoch 8: Loss:0.3398 | F1:0.7920(@0.40)




Epoch 9: Loss:0.3330 | F1:0.7954(@0.45)




Epoch 10: Loss:0.3280 | F1:0.7949(@0.40)




Epoch 11: Loss:0.3219 | F1:0.7939(@0.35)




Epoch 12: Loss:0.3177 | F1:0.7938(@0.40)




Epoch 13: Loss:0.3143 | F1:0.7927(@0.45)




Epoch 14: Loss:0.3064 | F1:0.7908(@0.35)
  Early stopping at epoch 14


[I 2025-12-01 09:06:35,470] Trial 59 finished with value: 0.7953673908623747 and parameters: {'dataset_size': '3x', 'lr': 0.00015780174261676789, 'batch_size': 128, 'optimizer': 'Adam', 'weight_decay': 4.0201018094250074e-05, 'dropout': 0.34877063584482965, 'freeze_ratio': 0.31104043419876626, 'augment_magnitude': 12, 'scheduler': 'cosine', 'head_layers': 1, 'head_hidden_dim': 384, 'head_dropout': 0.17600377542429704}. Best is trial 44 with value: 0.7970972008722697.



Trial 60: size=1x, lr=4.67e-05, batch=64, mag=12, freeze=0.29
  Freezing 3/12 Transformer blocks




Epoch 1: Loss:0.5808 | F1:0.7366(@0.40)




Epoch 2: Loss:0.4529 | F1:0.7516(@0.35)




Epoch 3: Loss:0.4290 | F1:0.7589(@0.40)




Epoch 4: Loss:0.4169 | F1:0.7627(@0.40)




Epoch 5: Loss:0.4105 | F1:0.7647(@0.35)




Epoch 6: Loss:0.4029 | F1:0.7672(@0.40)




Epoch 7: Loss:0.4012 | F1:0.7679(@0.45)




Epoch 8: Loss:0.3977 | F1:0.7694(@0.35)




Epoch 9: Loss:0.3956 | F1:0.7721(@0.40)




Epoch 10: Loss:0.3913 | F1:0.7719(@0.45)


[I 2025-12-01 09:13:20,871] Trial 60 pruned. 


Epoch 11: Loss:0.3891 | F1:0.7717(@0.35)

Trial 61: size=3x, lr=2.25e-04, batch=128, mag=14, freeze=0.13
  Freezing 1/12 Transformer blocks




Epoch 1: Loss:0.4247 | F1:0.7779(@0.40)




Epoch 2: Loss:0.3856 | F1:0.7847(@0.40)




Epoch 3: Loss:0.3705 | F1:0.7855(@0.40)




Epoch 4: Loss:0.3601 | F1:0.7920(@0.40)




Epoch 5: Loss:0.3525 | F1:0.7889(@0.50)




Epoch 6: Loss:0.3446 | F1:0.7892(@0.40)




Epoch 7: Loss:0.3373 | F1:0.7942(@0.40)




Epoch 8: Loss:0.3305 | F1:0.7933(@0.30)




Epoch 9: Loss:0.3249 | F1:0.7931(@0.40)




Epoch 10: Loss:0.3190 | F1:0.7929(@0.35)




Epoch 11: Loss:0.3121 | F1:0.7950(@0.35)




Epoch 12: Loss:0.3050 | F1:0.7920(@0.25)




Epoch 13: Loss:0.2966 | F1:0.7926(@0.40)




Epoch 14: Loss:0.2914 | F1:0.7924(@0.40)




Epoch 15: Loss:0.2829 | F1:0.7884(@0.25)




Epoch 16: Loss:0.2765 | F1:0.7899(@0.35)
  Early stopping at epoch 16


[I 2025-12-01 09:28:39,770] Trial 61 finished with value: 0.7949539361996832 and parameters: {'dataset_size': '3x', 'lr': 0.00022481429466114216, 'batch_size': 128, 'optimizer': 'Adam', 'weight_decay': 7.210779762742198e-05, 'dropout': 0.3212013535683023, 'freeze_ratio': 0.1283175370499127, 'augment_magnitude': 14, 'scheduler': 'cosine', 'head_layers': 1, 'head_hidden_dim': 384, 'head_dropout': 0.177760570721783}. Best is trial 44 with value: 0.7970972008722697.



Trial 62: size=3x, lr=1.95e-04, batch=128, mag=14, freeze=0.11
  Freezing 1/12 Transformer blocks




Epoch 1: Loss:0.4281 | F1:0.7740(@0.35)




Epoch 2: Loss:0.3853 | F1:0.7863(@0.35)




Epoch 3: Loss:0.3707 | F1:0.7875(@0.40)




Epoch 4: Loss:0.3605 | F1:0.7922(@0.40)




Epoch 5: Loss:0.3526 | F1:0.7930(@0.35)




Epoch 6: Loss:0.3453 | F1:0.7934(@0.40)




Epoch 7: Loss:0.3378 | F1:0.7913(@0.50)




Epoch 8: Loss:0.3306 | F1:0.7920(@0.35)




Epoch 9: Loss:0.3235 | F1:0.7922(@0.50)




Epoch 10: Loss:0.3170 | F1:0.7928(@0.30)




Epoch 11: Loss:0.3096 | F1:0.7906(@0.50)
  Early stopping at epoch 11


[I 2025-12-01 09:38:38,263] Trial 62 finished with value: 0.7934265068339682 and parameters: {'dataset_size': '3x', 'lr': 0.00019477406357074127, 'batch_size': 128, 'optimizer': 'Adam', 'weight_decay': 6.780023071352468e-05, 'dropout': 0.36684590571332915, 'freeze_ratio': 0.11154989823451389, 'augment_magnitude': 14, 'scheduler': 'cosine', 'head_layers': 1, 'head_hidden_dim': 384, 'head_dropout': 0.17497307670838083}. Best is trial 44 with value: 0.7970972008722697.



Trial 63: size=3x, lr=3.55e-04, batch=128, mag=14, freeze=0.16
  Freezing 1/12 Transformer blocks




Epoch 1: Loss:0.4509 | F1:0.7662(@0.35)




Epoch 2: Loss:0.4039 | F1:0.7725(@0.45)




Epoch 3: Loss:0.3910 | F1:0.7767(@0.40)




Epoch 4: Loss:0.3834 | F1:0.7834(@0.60)




Epoch 5: Loss:0.3777 | F1:0.7843(@0.40)




Epoch 6: Loss:0.3723 | F1:0.7839(@0.30)




Epoch 7: Loss:0.3699 | F1:0.7860(@0.30)




Epoch 8: Loss:0.3629 | F1:0.7881(@0.35)




Epoch 9: Loss:0.3586 | F1:0.7868(@0.40)




Epoch 10: Loss:0.3539 | F1:0.7882(@0.30)


[I 2025-12-01 09:48:33,667] Trial 63 pruned. 


Epoch 11: Loss:0.3472 | F1:0.7854(@0.40)

Trial 64: size=3x, lr=2.36e-04, batch=128, mag=15, freeze=0.04




Epoch 1: Loss:0.4292 | F1:0.7719(@0.45)




Epoch 2: Loss:0.3892 | F1:0.7819(@0.40)




Epoch 3: Loss:0.3775 | F1:0.7896(@0.35)




Epoch 4: Loss:0.3692 | F1:0.7880(@0.30)




Epoch 5: Loss:0.3611 | F1:0.7893(@0.35)




Epoch 6: Loss:0.3552 | F1:0.7941(@0.40)




Epoch 7: Loss:0.3485 | F1:0.7938(@0.35)




Epoch 8: Loss:0.3433 | F1:0.7919(@0.35)




Epoch 9: Loss:0.3416 | F1:0.7908(@0.35)




Epoch 10: Loss:0.3357 | F1:0.7917(@0.30)




Epoch 11: Loss:0.3287 | F1:0.7925(@0.30)
  Early stopping at epoch 11


[I 2025-12-01 09:59:34,407] Trial 64 finished with value: 0.7940630535891989 and parameters: {'dataset_size': '3x', 'lr': 0.00023579490212235232, 'batch_size': 128, 'optimizer': 'Adam', 'weight_decay': 8.805911480853238e-05, 'dropout': 0.35516011736872743, 'freeze_ratio': 0.03752367911896243, 'augment_magnitude': 15, 'scheduler': 'cosine', 'head_layers': 1, 'head_hidden_dim': 384, 'head_dropout': 0.18764173505888052}. Best is trial 44 with value: 0.7970972008722697.



Trial 65: size=3x, lr=1.62e-04, batch=128, mag=14, freeze=0.49
  Freezing 5/12 Transformer blocks




Epoch 1: Loss:0.4333 | F1:0.7740(@0.45)




Epoch 2: Loss:0.3925 | F1:0.7811(@0.40)




Epoch 3: Loss:0.3812 | F1:0.7835(@0.30)




Epoch 4: Loss:0.3717 | F1:0.7838(@0.35)




Epoch 5: Loss:0.3650 | F1:0.7881(@0.50)




Epoch 6: Loss:0.3596 | F1:0.7896(@0.35)




Epoch 7: Loss:0.3535 | F1:0.7902(@0.40)




Epoch 8: Loss:0.3480 | F1:0.7900(@0.40)




Epoch 9: Loss:0.3431 | F1:0.7900(@0.45)




Epoch 10: Loss:0.3383 | F1:0.7912(@0.35)




Epoch 11: Loss:0.3339 | F1:0.7903(@0.40)




Epoch 12: Loss:0.3290 | F1:0.7899(@0.45)




Epoch 13: Loss:0.3251 | F1:0.7899(@0.30)




Epoch 14: Loss:0.3207 | F1:0.7905(@0.45)




Epoch 15: Loss:0.3166 | F1:0.7905(@0.35)
  Early stopping at epoch 15


[I 2025-12-01 10:12:51,948] Trial 65 finished with value: 0.7912472525164849 and parameters: {'dataset_size': '3x', 'lr': 0.0001615947092565493, 'batch_size': 128, 'optimizer': 'Adam', 'weight_decay': 3.915328752554082e-05, 'dropout': 0.3211198346948305, 'freeze_ratio': 0.49331470856288406, 'augment_magnitude': 14, 'scheduler': 'cosine', 'head_layers': 1, 'head_hidden_dim': 384, 'head_dropout': 0.21094588714553789}. Best is trial 44 with value: 0.7970972008722697.



Trial 66: size=3x, lr=5.55e-04, batch=128, mag=15, freeze=0.11
  Freezing 1/12 Transformer blocks




Epoch 1: Loss:0.4520 | F1:0.7605(@0.30)




Epoch 2: Loss:0.4097 | F1:0.7689(@0.45)




Epoch 3: Loss:0.4014 | F1:0.7690(@0.50)




Epoch 4: Loss:0.3947 | F1:0.7739(@0.35)




Epoch 5: Loss:0.3918 | F1:0.7769(@0.35)




Epoch 6: Loss:0.3894 | F1:0.7780(@0.35)




Epoch 7: Loss:0.3843 | F1:0.7812(@0.45)




Epoch 8: Loss:0.3788 | F1:0.7798(@0.30)




Epoch 9: Loss:0.3739 | F1:0.7841(@0.50)




Epoch 10: Loss:0.3727 | F1:0.7805(@0.35)


[I 2025-12-01 10:23:29,078] Trial 66 pruned. 


Epoch 11: Loss:0.3662 | F1:0.7826(@0.30)

Trial 67: size=3x, lr=1.23e-04, batch=64, mag=10, freeze=0.08
  Freezing 1/12 Transformer blocks




Epoch 1: Loss:0.4179 | F1:0.7783(@0.35)




Epoch 2: Loss:0.3783 | F1:0.7831(@0.40)




Epoch 3: Loss:0.3647 | F1:0.7893(@0.30)




Epoch 4: Loss:0.3521 | F1:0.7912(@0.35)




Epoch 5: Loss:0.3421 | F1:0.7914(@0.45)




Epoch 6: Loss:0.3359 | F1:0.7930(@0.40)




Epoch 7: Loss:0.3283 | F1:0.7937(@0.45)




Epoch 8: Loss:0.3200 | F1:0.7932(@0.30)




Epoch 9: Loss:0.3137 | F1:0.7915(@0.30)




Epoch 10: Loss:0.3073 | F1:0.7921(@0.45)




Epoch 11: Loss:0.2990 | F1:0.7918(@0.30)




Epoch 12: Loss:0.2936 | F1:0.7954(@0.30)




Epoch 13: Loss:0.2891 | F1:0.7928(@0.30)




Epoch 14: Loss:0.2820 | F1:0.7906(@0.35)




Epoch 15: Loss:0.2785 | F1:0.7915(@0.25)




Epoch 16: Loss:0.2731 | F1:0.7877(@0.35)




Epoch 17: Loss:0.2679 | F1:0.7889(@0.35)
  Early stopping at epoch 17


[I 2025-12-01 10:50:15,833] Trial 67 finished with value: 0.795385617527369 and parameters: {'dataset_size': '3x', 'lr': 0.00012301620827618765, 'batch_size': 64, 'optimizer': 'Adam', 'weight_decay': 4.729336356530234e-05, 'dropout': 0.2930502069929105, 'freeze_ratio': 0.08445328159210788, 'augment_magnitude': 10, 'scheduler': 'cosine', 'head_layers': 1, 'head_hidden_dim': 384, 'head_dropout': 0.12288842142315112}. Best is trial 44 with value: 0.7970972008722697.



Trial 68: size=3x, lr=1.26e-04, batch=64, mag=10, freeze=0.08




Epoch 1: Loss:0.4168 | F1:0.7769(@0.50)




Epoch 2: Loss:0.3772 | F1:0.7877(@0.40)




Epoch 3: Loss:0.3613 | F1:0.7893(@0.35)




Epoch 4: Loss:0.3514 | F1:0.7926(@0.40)




Epoch 5: Loss:0.3437 | F1:0.7931(@0.40)




Epoch 6: Loss:0.3345 | F1:0.7939(@0.30)




Epoch 7: Loss:0.3272 | F1:0.7968(@0.30)




Epoch 8: Loss:0.3202 | F1:0.7952(@0.40)




Epoch 9: Loss:0.3136 | F1:0.7980(@0.35)




Epoch 10: Loss:0.3061 | F1:0.7956(@0.35)




Epoch 11: Loss:0.3014 | F1:0.7951(@0.35)




Epoch 12: Loss:0.2949 | F1:0.7918(@0.25)




Epoch 13: Loss:0.2886 | F1:0.7948(@0.30)




Epoch 14: Loss:0.2821 | F1:0.7916(@0.30)
  Early stopping at epoch 14


[I 2025-12-01 11:14:21,803] Trial 68 finished with value: 0.7980145682457112 and parameters: {'dataset_size': '3x', 'lr': 0.00012611787395820276, 'batch_size': 64, 'optimizer': 'AdamW', 'weight_decay': 5.1175849211101515e-05, 'dropout': 0.26910236555388667, 'freeze_ratio': 0.07903109345005047, 'augment_magnitude': 10, 'scheduler': 'cosine', 'head_layers': 1, 'head_hidden_dim': 384, 'head_dropout': 0.12633828901007427}. Best is trial 68 with value: 0.7980145682457112.



Trial 69: size=3x, lr=7.22e-05, batch=64, mag=10, freeze=0.07




Epoch 1: Loss:0.4413 | F1:0.7708(@0.40)




Epoch 2: Loss:0.3892 | F1:0.7820(@0.30)




Epoch 3: Loss:0.3751 | F1:0.7848(@0.35)




Epoch 4: Loss:0.3662 | F1:0.7888(@0.45)




Epoch 5: Loss:0.3602 | F1:0.7872(@0.40)




Epoch 6: Loss:0.3539 | F1:0.7916(@0.40)




Epoch 7: Loss:0.3496 | F1:0.7920(@0.45)




Epoch 8: Loss:0.3437 | F1:0.7918(@0.45)




Epoch 9: Loss:0.3420 | F1:0.7936(@0.35)




Epoch 10: Loss:0.3386 | F1:0.7932(@0.35)




Epoch 11: Loss:0.3347 | F1:0.7941(@0.40)




Epoch 12: Loss:0.3315 | F1:0.7947(@0.35)




Epoch 13: Loss:0.3286 | F1:0.7934(@0.45)




Epoch 14: Loss:0.3263 | F1:0.7939(@0.30)




Epoch 15: Loss:0.3248 | F1:0.7940(@0.35)




Epoch 16: Loss:0.3223 | F1:0.7921(@0.30)




Epoch 17: Loss:0.3206 | F1:0.7940(@0.40)
  Early stopping at epoch 17


[I 2025-12-01 11:42:29,350] Trial 69 finished with value: 0.7946665534300091 and parameters: {'dataset_size': '3x', 'lr': 7.22115259123543e-05, 'batch_size': 64, 'optimizer': 'AdamW', 'weight_decay': 5.011013873630503e-05, 'dropout': 0.29167944774372767, 'freeze_ratio': 0.07057771483541402, 'augment_magnitude': 10, 'scheduler': 'cosine', 'head_layers': 1, 'head_hidden_dim': 256, 'head_dropout': 0.12163680316388037}. Best is trial 68 with value: 0.7980145682457112.



Trial 70: size=2x, lr=8.96e-05, batch=64, mag=9, freeze=0.23
  Freezing 2/12 Transformer blocks




Epoch 1: Loss:0.4330 | F1:0.7702(@0.30)




Epoch 2: Loss:0.3882 | F1:0.7800(@0.40)




Epoch 3: Loss:0.3725 | F1:0.7825(@0.40)




Epoch 4: Loss:0.3628 | F1:0.7871(@0.35)




Epoch 5: Loss:0.3538 | F1:0.7877(@0.25)




Epoch 6: Loss:0.3475 | F1:0.7895(@0.35)




Epoch 7: Loss:0.3412 | F1:0.7901(@0.45)




Epoch 8: Loss:0.3348 | F1:0.7901(@0.40)




Epoch 9: Loss:0.3301 | F1:0.7879(@0.35)




Epoch 10: Loss:0.3215 | F1:0.7872(@0.30)


[I 2025-12-01 11:54:45,293] Trial 70 pruned. 


Epoch 11: Loss:0.3161 | F1:0.7846(@0.30)

Trial 71: size=3x, lr=1.28e-04, batch=64, mag=11, freeze=0.17
  Freezing 2/12 Transformer blocks




Epoch 1: Loss:0.4210 | F1:0.7789(@0.35)




Epoch 2: Loss:0.3793 | F1:0.7871(@0.35)




Epoch 3: Loss:0.3655 | F1:0.7870(@0.35)




Epoch 4: Loss:0.3555 | F1:0.7902(@0.35)




Epoch 5: Loss:0.3471 | F1:0.7919(@0.30)




Epoch 6: Loss:0.3394 | F1:0.7940(@0.40)




Epoch 7: Loss:0.3322 | F1:0.7925(@0.25)




Epoch 8: Loss:0.3263 | F1:0.7933(@0.30)




Epoch 9: Loss:0.3201 | F1:0.7954(@0.35)




Epoch 10: Loss:0.3129 | F1:0.7938(@0.40)




Epoch 11: Loss:0.3081 | F1:0.7970(@0.40)




Epoch 12: Loss:0.3036 | F1:0.7929(@0.35)




Epoch 13: Loss:0.2985 | F1:0.7903(@0.35)




Epoch 14: Loss:0.2928 | F1:0.7913(@0.30)




Epoch 15: Loss:0.2871 | F1:0.7907(@0.35)




Epoch 16: Loss:0.2833 | F1:0.7922(@0.30)
  Early stopping at epoch 16


[I 2025-12-01 12:18:00,944] Trial 71 finished with value: 0.7969610636277303 and parameters: {'dataset_size': '3x', 'lr': 0.00012807901932729139, 'batch_size': 64, 'optimizer': 'AdamW', 'weight_decay': 5.533986763998879e-05, 'dropout': 0.24861810427207504, 'freeze_ratio': 0.17359204570303038, 'augment_magnitude': 11, 'scheduler': 'cosine', 'head_layers': 1, 'head_hidden_dim': 384, 'head_dropout': 0.10410785770668445}. Best is trial 68 with value: 0.7980145682457112.



Trial 72: size=3x, lr=1.20e-04, batch=64, mag=11, freeze=0.15
  Freezing 1/12 Transformer blocks




Epoch 1: Loss:0.4214 | F1:0.7748(@0.45)




Epoch 2: Loss:0.3812 | F1:0.7869(@0.45)




Epoch 3: Loss:0.3659 | F1:0.7915(@0.35)




Epoch 4: Loss:0.3560 | F1:0.7928(@0.40)




Epoch 5: Loss:0.3467 | F1:0.7924(@0.40)




Epoch 6: Loss:0.3398 | F1:0.7943(@0.45)




Epoch 7: Loss:0.3320 | F1:0.7936(@0.45)




Epoch 8: Loss:0.3271 | F1:0.7952(@0.45)




Epoch 9: Loss:0.3208 | F1:0.7936(@0.30)




Epoch 10: Loss:0.3151 | F1:0.7976(@0.40)




Epoch 11: Loss:0.3090 | F1:0.7956(@0.25)




Epoch 12: Loss:0.3026 | F1:0.7929(@0.30)




Epoch 13: Loss:0.2989 | F1:0.7913(@0.25)




Epoch 14: Loss:0.2938 | F1:0.7911(@0.20)




Epoch 15: Loss:0.2894 | F1:0.7924(@0.30)
  Early stopping at epoch 15


[I 2025-12-01 12:42:10,118] Trial 72 finished with value: 0.7976294474068587 and parameters: {'dataset_size': '3x', 'lr': 0.00012047929193760436, 'batch_size': 64, 'optimizer': 'AdamW', 'weight_decay': 5.4572242687989935e-05, 'dropout': 0.24552911995199328, 'freeze_ratio': 0.15324475724511796, 'augment_magnitude': 11, 'scheduler': 'cosine', 'head_layers': 1, 'head_hidden_dim': 384, 'head_dropout': 0.10282996073268898}. Best is trial 68 with value: 0.7980145682457112.



Trial 73: size=3x, lr=1.10e-04, batch=64, mag=10, freeze=0.17
  Freezing 2/12 Transformer blocks




Epoch 1: Loss:0.4201 | F1:0.7759(@0.35)




Epoch 2: Loss:0.3800 | F1:0.7876(@0.40)




Epoch 3: Loss:0.3660 | F1:0.7900(@0.40)




Epoch 4: Loss:0.3561 | F1:0.7919(@0.45)




Epoch 5: Loss:0.3470 | F1:0.7930(@0.40)




Epoch 6: Loss:0.3413 | F1:0.7957(@0.40)




Epoch 7: Loss:0.3340 | F1:0.7968(@0.40)




Epoch 8: Loss:0.3271 | F1:0.7962(@0.35)




Epoch 9: Loss:0.3226 | F1:0.7943(@0.40)




Epoch 10: Loss:0.3157 | F1:0.7954(@0.35)




Epoch 11: Loss:0.3109 | F1:0.7901(@0.30)




Epoch 12: Loss:0.3047 | F1:0.7945(@0.25)
  Early stopping at epoch 12


[I 2025-12-01 12:59:36,752] Trial 73 finished with value: 0.7967748720465541 and parameters: {'dataset_size': '3x', 'lr': 0.00010983206349977021, 'batch_size': 64, 'optimizer': 'AdamW', 'weight_decay': 5.3890724993548635e-05, 'dropout': 0.2454197144921044, 'freeze_ratio': 0.16906423366799417, 'augment_magnitude': 10, 'scheduler': 'cosine', 'head_layers': 1, 'head_hidden_dim': 384, 'head_dropout': 0.10563600838538884}. Best is trial 68 with value: 0.7980145682457112.



Trial 74: size=3x, lr=5.73e-05, batch=64, mag=10, freeze=0.17
  Freezing 2/12 Transformer blocks




Epoch 1: Loss:0.4586 | F1:0.7654(@0.40)




Epoch 2: Loss:0.4033 | F1:0.7734(@0.35)




Epoch 3: Loss:0.3924 | F1:0.7775(@0.40)




Epoch 4: Loss:0.3856 | F1:0.7804(@0.45)




Epoch 5: Loss:0.3806 | F1:0.7823(@0.45)




Epoch 6: Loss:0.3759 | F1:0.7853(@0.40)




Epoch 7: Loss:0.3728 | F1:0.7864(@0.45)




Epoch 8: Loss:0.3701 | F1:0.7868(@0.35)




Epoch 9: Loss:0.3688 | F1:0.7878(@0.40)




Epoch 10: Loss:0.3650 | F1:0.7871(@0.40)


[I 2025-12-01 13:15:48,734] Trial 74 pruned. 


Epoch 11: Loss:0.3644 | F1:0.7873(@0.40)

Trial 75: size=3x, lr=1.19e-04, batch=64, mag=11, freeze=0.14
  Freezing 1/12 Transformer blocks




Epoch 1: Loss:0.4209 | F1:0.7735(@0.30)




Epoch 2: Loss:0.3811 | F1:0.7843(@0.40)




Epoch 3: Loss:0.3671 | F1:0.7874(@0.35)




Epoch 4: Loss:0.3555 | F1:0.7910(@0.40)




Epoch 5: Loss:0.3473 | F1:0.7937(@0.40)




Epoch 6: Loss:0.3410 | F1:0.7910(@0.40)




Epoch 7: Loss:0.3335 | F1:0.7921(@0.30)




Epoch 8: Loss:0.3267 | F1:0.7961(@0.40)




Epoch 9: Loss:0.3214 | F1:0.7920(@0.40)




Epoch 10: Loss:0.3154 | F1:0.7961(@0.30)




Epoch 11: Loss:0.3095 | F1:0.7957(@0.25)




Epoch 12: Loss:0.3043 | F1:0.7952(@0.35)




Epoch 13: Loss:0.3002 | F1:0.7960(@0.35)




Epoch 14: Loss:0.2938 | F1:0.7941(@0.35)




Epoch 15: Loss:0.2915 | F1:0.7923(@0.40)
  Early stopping at epoch 15


[I 2025-12-01 13:39:04,036] Trial 75 finished with value: 0.7961449734567386 and parameters: {'dataset_size': '3x', 'lr': 0.00011887616281506264, 'batch_size': 64, 'optimizer': 'AdamW', 'weight_decay': 9.863463029431092e-05, 'dropout': 0.25732847253135244, 'freeze_ratio': 0.1401247070798874, 'augment_magnitude': 11, 'scheduler': 'cosine', 'head_layers': 1, 'head_hidden_dim': 384, 'head_dropout': 0.1128762587023949}. Best is trial 68 with value: 0.7980145682457112.



Trial 76: size=3x, lr=1.13e-04, batch=64, mag=11, freeze=0.15
  Freezing 1/12 Transformer blocks




Epoch 1: Loss:0.4214 | F1:0.7792(@0.45)




Epoch 2: Loss:0.3796 | F1:0.7876(@0.40)




Epoch 3: Loss:0.3665 | F1:0.7909(@0.40)




Epoch 4: Loss:0.3553 | F1:0.7919(@0.40)




Epoch 5: Loss:0.3473 | F1:0.7936(@0.40)




Epoch 6: Loss:0.3401 | F1:0.7930(@0.40)




Epoch 7: Loss:0.3350 | F1:0.7951(@0.45)




Epoch 8: Loss:0.3276 | F1:0.7958(@0.40)




Epoch 9: Loss:0.3220 | F1:0.7950(@0.35)




Epoch 10: Loss:0.3177 | F1:0.7948(@0.40)




Epoch 11: Loss:0.3121 | F1:0.7948(@0.30)




Epoch 12: Loss:0.3081 | F1:0.7949(@0.30)




Epoch 13: Loss:0.3019 | F1:0.7911(@0.35)
  Early stopping at epoch 13


[I 2025-12-01 13:59:13,989] Trial 76 finished with value: 0.795812316390444 and parameters: {'dataset_size': '3x', 'lr': 0.0001131483246682819, 'batch_size': 64, 'optimizer': 'AdamW', 'weight_decay': 0.00011583487257140692, 'dropout': 0.25158039653923303, 'freeze_ratio': 0.14920297983951197, 'augment_magnitude': 11, 'scheduler': 'cosine', 'head_layers': 1, 'head_hidden_dim': 384, 'head_dropout': 0.1150958067555352}. Best is trial 68 with value: 0.7980145682457112.



Trial 77: size=3x, lr=3.86e-05, batch=64, mag=11, freeze=0.24
  Freezing 2/12 Transformer blocks




Epoch 1: Loss:0.5041 | F1:0.7521(@0.40)




Epoch 2: Loss:0.4211 | F1:0.7624(@0.45)




Epoch 3: Loss:0.4070 | F1:0.7675(@0.40)




Epoch 4: Loss:0.4001 | F1:0.7713(@0.45)




Epoch 5: Loss:0.3954 | F1:0.7736(@0.40)




Epoch 6: Loss:0.3922 | F1:0.7752(@0.35)




Epoch 7: Loss:0.3894 | F1:0.7768(@0.40)




Epoch 8: Loss:0.3877 | F1:0.7772(@0.45)




Epoch 9: Loss:0.3847 | F1:0.7782(@0.40)




Epoch 10: Loss:0.3848 | F1:0.7792(@0.40)


[I 2025-12-01 14:15:18,416] Trial 77 pruned. 


Epoch 11: Loss:0.3828 | F1:0.7794(@0.45)

Trial 78: size=3x, lr=7.77e-05, batch=64, mag=10, freeze=0.18
  Freezing 2/12 Transformer blocks




Epoch 1: Loss:0.4337 | F1:0.7714(@0.40)




Epoch 2: Loss:0.3865 | F1:0.7813(@0.45)




Epoch 3: Loss:0.3755 | F1:0.7834(@0.45)




Epoch 4: Loss:0.3663 | F1:0.7864(@0.35)




Epoch 5: Loss:0.3613 | F1:0.7885(@0.45)




Epoch 6: Loss:0.3563 | F1:0.7868(@0.40)




Epoch 7: Loss:0.3515 | F1:0.7890(@0.35)




Epoch 8: Loss:0.3477 | F1:0.7911(@0.35)




Epoch 9: Loss:0.3439 | F1:0.7908(@0.40)




Epoch 10: Loss:0.3400 | F1:0.7915(@0.45)




Epoch 11: Loss:0.3376 | F1:0.7917(@0.50)


[I 2025-12-01 14:32:46,181] Trial 78 pruned. 


Epoch 12: Loss:0.3350 | F1:0.7904(@0.40)

Trial 79: size=1x, lr=1.30e-04, batch=64, mag=8, freeze=0.20
  Freezing 2/12 Transformer blocks




Epoch 1: Loss:0.4507 | F1:0.7603(@0.35)




Epoch 2: Loss:0.3958 | F1:0.7713(@0.50)




Epoch 3: Loss:0.3794 | F1:0.7766(@0.50)




Epoch 4: Loss:0.3695 | F1:0.7788(@0.35)




Epoch 5: Loss:0.3581 | F1:0.7836(@0.40)




Epoch 6: Loss:0.3420 | F1:0.7801(@0.40)




Epoch 7: Loss:0.3325 | F1:0.7828(@0.40)




Epoch 8: Loss:0.3240 | F1:0.7827(@0.40)




Epoch 9: Loss:0.3123 | F1:0.7838(@0.35)




Epoch 10: Loss:0.3018 | F1:0.7806(@0.30)


[I 2025-12-01 14:39:25,343] Trial 79 pruned. 


Epoch 11: Loss:0.2912 | F1:0.7733(@0.30)

Trial 80: size=3x, lr=6.58e-05, batch=64, mag=11, freeze=0.09
  Freezing 1/12 Transformer blocks




Epoch 1: Loss:0.4425 | F1:0.7694(@0.45)




Epoch 2: Loss:0.3938 | F1:0.7780(@0.40)




Epoch 3: Loss:0.3810 | F1:0.7822(@0.40)




Epoch 4: Loss:0.3742 | F1:0.7864(@0.45)




Epoch 5: Loss:0.3686 | F1:0.7867(@0.45)




Epoch 6: Loss:0.3637 | F1:0.7863(@0.40)




Epoch 7: Loss:0.3599 | F1:0.7890(@0.40)




Epoch 8: Loss:0.3557 | F1:0.7873(@0.40)




Epoch 9: Loss:0.3533 | F1:0.7906(@0.45)




Epoch 10: Loss:0.3518 | F1:0.7899(@0.45)


[I 2025-12-01 14:56:34,169] Trial 80 pruned. 


Epoch 11: Loss:0.3490 | F1:0.7901(@0.40)

Trial 81: size=3x, lr=9.20e-05, batch=64, mag=9, freeze=0.14
  Freezing 1/12 Transformer blocks




Epoch 1: Loss:0.4194 | F1:0.7761(@0.40)




Epoch 2: Loss:0.3794 | F1:0.7857(@0.30)




Epoch 3: Loss:0.3637 | F1:0.7892(@0.45)




Epoch 4: Loss:0.3567 | F1:0.7921(@0.40)




Epoch 5: Loss:0.3471 | F1:0.7923(@0.40)




Epoch 6: Loss:0.3405 | F1:0.7939(@0.35)




Epoch 7: Loss:0.3350 | F1:0.7937(@0.40)




Epoch 8: Loss:0.3293 | F1:0.7923(@0.45)




Epoch 9: Loss:0.3247 | F1:0.7925(@0.35)




Epoch 10: Loss:0.3192 | F1:0.7940(@0.40)




Epoch 11: Loss:0.3147 | F1:0.7921(@0.30)




Epoch 12: Loss:0.3103 | F1:0.7937(@0.45)




Epoch 13: Loss:0.3059 | F1:0.7921(@0.30)




Epoch 14: Loss:0.3018 | F1:0.7922(@0.45)




Epoch 15: Loss:0.2991 | F1:0.7908(@0.35)
  Early stopping at epoch 15


[I 2025-12-01 15:19:58,388] Trial 81 finished with value: 0.7939683593005443 and parameters: {'dataset_size': '3x', 'lr': 9.202687925503917e-05, 'batch_size': 64, 'optimizer': 'AdamW', 'weight_decay': 2.6830224600466703e-05, 'dropout': 0.24945951621223006, 'freeze_ratio': 0.13621063305284867, 'augment_magnitude': 9, 'scheduler': 'cosine', 'head_layers': 1, 'head_hidden_dim': 384, 'head_dropout': 0.11231363538555428}. Best is trial 68 with value: 0.7980145682457112.



Trial 82: size=3x, lr=1.14e-04, batch=64, mag=11, freeze=0.14
  Freezing 1/12 Transformer blocks




Epoch 1: Loss:0.4186 | F1:0.7817(@0.40)




Epoch 2: Loss:0.3779 | F1:0.7864(@0.40)




Epoch 3: Loss:0.3641 | F1:0.7917(@0.35)




Epoch 4: Loss:0.3541 | F1:0.7907(@0.40)




Epoch 5: Loss:0.3465 | F1:0.7894(@0.30)




Epoch 6: Loss:0.3386 | F1:0.7929(@0.40)




Epoch 7: Loss:0.3333 | F1:0.7953(@0.40)




Epoch 8: Loss:0.3267 | F1:0.7961(@0.40)




Epoch 9: Loss:0.3211 | F1:0.7928(@0.35)




Epoch 10: Loss:0.3171 | F1:0.7919(@0.25)




Epoch 11: Loss:0.3114 | F1:0.7945(@0.35)




Epoch 12: Loss:0.3058 | F1:0.7963(@0.35)




Epoch 13: Loss:0.3007 | F1:0.7912(@0.30)




Epoch 14: Loss:0.2954 | F1:0.7900(@0.30)




Epoch 15: Loss:0.2920 | F1:0.7891(@0.30)




Epoch 16: Loss:0.2877 | F1:0.7923(@0.30)




Epoch 17: Loss:0.2835 | F1:0.7897(@0.40)
  Early stopping at epoch 17


[I 2025-12-01 15:46:20,268] Trial 82 finished with value: 0.7962637160763366 and parameters: {'dataset_size': '3x', 'lr': 0.00011433272958518131, 'batch_size': 64, 'optimizer': 'AdamW', 'weight_decay': 0.00014082251801493471, 'dropout': 0.25471676280809047, 'freeze_ratio': 0.1362433364130634, 'augment_magnitude': 11, 'scheduler': 'cosine', 'head_layers': 1, 'head_hidden_dim': 384, 'head_dropout': 0.11584572008753602}. Best is trial 68 with value: 0.7980145682457112.



Trial 83: size=3x, lr=1.04e-04, batch=64, mag=11, freeze=0.19
  Freezing 2/12 Transformer blocks




Epoch 1: Loss:0.4247 | F1:0.7765(@0.45)




Epoch 2: Loss:0.3841 | F1:0.7853(@0.40)




Epoch 3: Loss:0.3713 | F1:0.7874(@0.45)




Epoch 4: Loss:0.3616 | F1:0.7878(@0.45)




Epoch 5: Loss:0.3537 | F1:0.7896(@0.40)




Epoch 6: Loss:0.3461 | F1:0.7923(@0.35)




Epoch 7: Loss:0.3413 | F1:0.7933(@0.35)




Epoch 8: Loss:0.3354 | F1:0.7924(@0.30)




Epoch 9: Loss:0.3308 | F1:0.7918(@0.40)




Epoch 10: Loss:0.3255 | F1:0.7938(@0.40)




Epoch 11: Loss:0.3214 | F1:0.7891(@0.45)




Epoch 12: Loss:0.3173 | F1:0.7926(@0.30)




Epoch 13: Loss:0.3127 | F1:0.7915(@0.30)




Epoch 14: Loss:0.3089 | F1:0.7933(@0.35)




Epoch 15: Loss:0.3036 | F1:0.7935(@0.40)
  Early stopping at epoch 15


[I 2025-12-01 16:08:18,266] Trial 83 finished with value: 0.7938150916874321 and parameters: {'dataset_size': '3x', 'lr': 0.00010422308669877469, 'batch_size': 64, 'optimizer': 'AdamW', 'weight_decay': 0.00026122067145021297, 'dropout': 0.27635030082440054, 'freeze_ratio': 0.19185962910607438, 'augment_magnitude': 11, 'scheduler': 'cosine', 'head_layers': 1, 'head_hidden_dim': 384, 'head_dropout': 0.1615891446430471}. Best is trial 68 with value: 0.7980145682457112.



Trial 84: size=3x, lr=1.30e-04, batch=64, mag=10, freeze=0.03




Epoch 1: Loss:0.4181 | F1:0.7787(@0.35)




Epoch 2: Loss:0.3773 | F1:0.7863(@0.35)




Epoch 3: Loss:0.3625 | F1:0.7884(@0.40)




Epoch 4: Loss:0.3517 | F1:0.7922(@0.35)




Epoch 5: Loss:0.3431 | F1:0.7923(@0.35)




Epoch 6: Loss:0.3346 | F1:0.7923(@0.35)




Epoch 7: Loss:0.3280 | F1:0.7932(@0.35)




Epoch 8: Loss:0.3205 | F1:0.7919(@0.40)




Epoch 9: Loss:0.3147 | F1:0.7932(@0.35)




Epoch 10: Loss:0.3058 | F1:0.7917(@0.30)




Epoch 11: Loss:0.3014 | F1:0.7913(@0.30)




Epoch 12: Loss:0.2938 | F1:0.7928(@0.25)
  Early stopping at epoch 12


[I 2025-12-01 16:28:25,072] Trial 84 finished with value: 0.7932226045903131 and parameters: {'dataset_size': '3x', 'lr': 0.00013042155439041162, 'batch_size': 64, 'optimizer': 'AdamW', 'weight_decay': 0.00017882096640658824, 'dropout': 0.2582496314555172, 'freeze_ratio': 0.03126377028350432, 'augment_magnitude': 10, 'scheduler': 'cosine', 'head_layers': 1, 'head_hidden_dim': 384, 'head_dropout': 0.11834380953130771}. Best is trial 68 with value: 0.7980145682457112.



Trial 85: size=3x, lr=2.01e-04, batch=64, mag=11, freeze=0.26
  Freezing 3/12 Transformer blocks




Epoch 1: Loss:0.4177 | F1:0.7776(@0.50)




Epoch 2: Loss:0.3815 | F1:0.7872(@0.45)




Epoch 3: Loss:0.3690 | F1:0.7873(@0.40)




Epoch 4: Loss:0.3574 | F1:0.7895(@0.35)




Epoch 5: Loss:0.3485 | F1:0.7931(@0.40)




Epoch 6: Loss:0.3407 | F1:0.7927(@0.40)




Epoch 7: Loss:0.3334 | F1:0.7906(@0.40)




Epoch 8: Loss:0.3257 | F1:0.7875(@0.45)




Epoch 9: Loss:0.3193 | F1:0.7865(@0.40)




Epoch 10: Loss:0.3120 | F1:0.7884(@0.20)
  Early stopping at epoch 10


[I 2025-12-01 16:42:29,433] Trial 85 finished with value: 0.7931339210598873 and parameters: {'dataset_size': '3x', 'lr': 0.0002007469305421877, 'batch_size': 64, 'optimizer': 'AdamW', 'weight_decay': 0.0001230300558504423, 'dropout': 0.24350347857400348, 'freeze_ratio': 0.2630147856515259, 'augment_magnitude': 11, 'scheduler': 'cosine', 'head_layers': 1, 'head_hidden_dim': 384, 'head_dropout': 0.13224282491373227}. Best is trial 68 with value: 0.7980145682457112.



Trial 86: size=3x, lr=2.62e-04, batch=64, mag=11, freeze=0.15
  Freezing 1/12 Transformer blocks




Epoch 1: Loss:0.4197 | F1:0.7698(@0.35)




Epoch 2: Loss:0.3861 | F1:0.7835(@0.35)




Epoch 3: Loss:0.3723 | F1:0.7887(@0.45)




Epoch 4: Loss:0.3634 | F1:0.7856(@0.40)




Epoch 5: Loss:0.3551 | F1:0.7887(@0.40)




Epoch 6: Loss:0.3496 | F1:0.7895(@0.35)




Epoch 7: Loss:0.3413 | F1:0.7899(@0.30)




Epoch 8: Loss:0.3327 | F1:0.7923(@0.45)




Epoch 9: Loss:0.3280 | F1:0.7895(@0.35)




Epoch 10: Loss:0.3202 | F1:0.7928(@0.30)




Epoch 11: Loss:0.3130 | F1:0.7934(@0.40)




Epoch 12: Loss:0.3041 | F1:0.7916(@0.35)




Epoch 13: Loss:0.2990 | F1:0.7919(@0.25)




Epoch 14: Loss:0.2929 | F1:0.7909(@0.25)




Epoch 15: Loss:0.2864 | F1:0.7898(@0.35)




Epoch 16: Loss:0.2810 | F1:0.7899(@0.25)
  Early stopping at epoch 16


[I 2025-12-01 17:07:43,917] Trial 86 finished with value: 0.7934374655039188 and parameters: {'dataset_size': '3x', 'lr': 0.00026218348686681784, 'batch_size': 64, 'optimizer': 'AdamW', 'weight_decay': 5.052604579823317e-05, 'dropout': 0.20849764157929074, 'freeze_ratio': 0.15290048262407577, 'augment_magnitude': 11, 'scheduler': 'cosine', 'head_layers': 1, 'head_hidden_dim': 256, 'head_dropout': 0.10900528118672179}. Best is trial 68 with value: 0.7980145682457112.



Trial 87: size=2x, lr=1.35e-04, batch=64, mag=10, freeze=0.10
  Freezing 1/12 Transformer blocks




Epoch 1: Loss:0.4226 | F1:0.7709(@0.45)




Epoch 2: Loss:0.3829 | F1:0.7844(@0.40)




Epoch 3: Loss:0.3660 | F1:0.7866(@0.45)




Epoch 4: Loss:0.3544 | F1:0.7890(@0.40)




Epoch 5: Loss:0.3458 | F1:0.7883(@0.35)




Epoch 6: Loss:0.3332 | F1:0.7887(@0.35)




Epoch 7: Loss:0.3251 | F1:0.7913(@0.45)




Epoch 8: Loss:0.3157 | F1:0.7891(@0.30)




Epoch 9: Loss:0.3051 | F1:0.7909(@0.25)




Epoch 10: Loss:0.2967 | F1:0.7890(@0.35)


[I 2025-12-01 17:19:50,038] Trial 87 pruned. 


Epoch 11: Loss:0.2857 | F1:0.7889(@0.25)

Trial 88: size=3x, lr=5.16e-05, batch=64, mag=12, freeze=0.13
  Freezing 1/12 Transformer blocks




Epoch 1: Loss:0.4566 | F1:0.7658(@0.45)




Epoch 2: Loss:0.4035 | F1:0.7722(@0.40)




Epoch 3: Loss:0.3920 | F1:0.7772(@0.35)




Epoch 4: Loss:0.3848 | F1:0.7797(@0.40)




Epoch 5: Loss:0.3787 | F1:0.7819(@0.45)




Epoch 6: Loss:0.3740 | F1:0.7837(@0.30)




Epoch 7: Loss:0.3707 | F1:0.7844(@0.40)




Epoch 8: Loss:0.3691 | F1:0.7851(@0.40)




Epoch 9: Loss:0.3664 | F1:0.7851(@0.40)




Epoch 10: Loss:0.3652 | F1:0.7863(@0.40)


[I 2025-12-01 17:36:59,689] Trial 88 pruned. 


Epoch 11: Loss:0.3627 | F1:0.7879(@0.40)

Trial 89: size=3x, lr=3.05e-04, batch=64, mag=10, freeze=0.05




Epoch 1: Loss:0.4349 | F1:0.7669(@0.40)




Epoch 2: Loss:0.3979 | F1:0.7727(@0.40)




Epoch 3: Loss:0.3878 | F1:0.7727(@0.40)




Epoch 4: Loss:0.3796 | F1:0.7817(@0.45)




Epoch 5: Loss:0.3727 | F1:0.7850(@0.45)




Epoch 6: Loss:0.3692 | F1:0.7861(@0.35)




Epoch 7: Loss:0.3630 | F1:0.7904(@0.30)




Epoch 8: Loss:0.3564 | F1:0.7902(@0.45)




Epoch 9: Loss:0.3503 | F1:0.7879(@0.45)




Epoch 10: Loss:0.3456 | F1:0.7906(@0.40)


[I 2025-12-01 17:55:27,866] Trial 89 pruned. 


Epoch 11: Loss:0.3400 | F1:0.7891(@0.35)

Trial 90: size=3x, lr=1.03e-04, batch=64, mag=9, freeze=0.22
  Freezing 2/12 Transformer blocks




Epoch 1: Loss:0.4264 | F1:0.7759(@0.40)




Epoch 2: Loss:0.3838 | F1:0.7824(@0.35)




Epoch 3: Loss:0.3684 | F1:0.7889(@0.40)




Epoch 4: Loss:0.3591 | F1:0.7891(@0.40)




Epoch 5: Loss:0.3514 | F1:0.7928(@0.35)


Val:  34%|███▍      | 53/157 [00:04<00:04, 20.86it/s]

In [None]:
# 1. Driveの再マウント
from google.colab import drive
drive.flush_and_unmount()
drive.mount('/content/drive', force_remount=True)

# 2. パスの確認
from pathlib import Path
base_dir = '/content/drive/MyDrive/XRAIN/yano/20250601~20251020_dataset/'
path_224 = Path(base_dir) / 'npz_datasets_v4' / 'npz_datasets_v4_universal_224px'
print(f"224px path: {path_224}")
print(f"Exists: {path_224.exists()}")
if path_224.exists():
    files = list(path_224.glob('*.npz'))
    print(f"File count: {len(files)}")

Mounted at /content/drive
224px path: /content/drive/MyDrive/XRAIN/yano/20250601~20251020_dataset/npz_datasets_v4/npz_datasets_v4_universal_224px
Exists: True
File count: 80
