In [1]:

from pathlib import Path
import shutil
import zipfile

# Kaggle の入力データセット slug（username/slug の slug 部分）
DATASET_SLUG = 'gns-codes'
DATASET_ROOT = Path(f'/kaggle/input/{DATASET_SLUG}')
WORK_ROOT = Path('/kaggle/working')
repo_dir = WORK_ROOT / 'code'

code_dir = DATASET_ROOT / 'code'
code_zip = DATASET_ROOT / 'code.zip'

# 展開済み code/ を優先し、無ければ code.zip を展開
if code_dir.exists():
    src = code_dir
elif code_zip.exists():
    if repo_dir.exists():
        shutil.rmtree(repo_dir)
    with zipfile.ZipFile(code_zip) as zf:
        zf.extractall(repo_dir)
    src = repo_dir
else:
    raise FileNotFoundError(f"/kaggle/input/{DATASET_SLUG} に code も code.zip もありません。Add Data で {DATASET_SLUG} を追加してください。")

# フラット化（zip解凍で1階層挟まった場合）
if src != repo_dir:
    if repo_dir.exists():
        shutil.rmtree(repo_dir)
    shutil.copytree(src, repo_dir)
if repo_dir.exists():
    children = list(repo_dir.iterdir())
    if len(children) == 1 and children[0].is_dir():
        inner = children[0]
        for p in inner.iterdir():
            p.rename(repo_dir / p.name)
        inner.rmdir()

%cd $repo_dir


FileNotFoundError: /kaggle/input/gns-codes に code も code.zip もありません。Add Data で gns-codes を追加してください。

In [None]:

# PyG の radius_graph で必要になる torch-cluster だけインストール
import torch

torch_ver = torch.__version__.split('+')[0]
cuda_ver = torch.version.cuda
cuda_tag = 'cpu' if cuda_ver is None else 'cu' + cuda_ver.replace('.', '')
url = f"https://data.pyg.org/whl/torch-{torch_ver}+{cuda_tag}.html"
print('PyG wheels:', url)

!pip -q install torch-cluster -f {url}
!pip -q install torch_geometric


In [None]:
from pathlib import Path
import copy
import yaml
import re
import json

REPO = Path('/kaggle/working/code')
cfg_path = REPO / 'config_rollout.yaml'

# 全モデルを同じデータセットで検証したい場合のスイッチ
USE_FORCE_DATASET = False  # True にすると下記 FORCE_DATASET_ROOT を強制使用
FORCE_DATASET_ROOT = Path('/kaggle/input/dam-break-left-800')

# モデルごとに対応するデータセットを自動解決する

def _load_metadata(path: Path):
    try:
        with path.open('r', encoding='utf-8') as f:
            return json.load(f)
    except Exception:
        return None

def _find_metadata(root: Path):
    candidates = [
        root / 'metadata.json',
        root / 'metadata' / 'metadata.json',
    ]
    candidates += list(root.glob('**/metadata*.json'))
    for c in candidates:
        if c.is_file():
            meta = _load_metadata(c)
            if isinstance(meta, dict):
                return meta, c
    return None, None

def _extract_sequence_length(meta: dict):
    if not isinstance(meta, dict):
        return None
    for key in ('train', 'valid', 'test', 'rollout'):
        part = meta.get(key)
        if isinstance(part, dict) and 'sequence_length' in part:
            try:
                return int(part['sequence_length'])
            except Exception:
                pass
    if 'sequence_length' in meta:
        try:
            return int(meta['sequence_length'])
        except Exception:
            return None
    return None

def resolve_dataset_root(step: int | None):
    base = Path('/kaggle/input')
    if not base.exists():
        return None, False

    if USE_FORCE_DATASET:
        if FORCE_DATASET_ROOT.exists():
            print(f"[force] step={step} を無視して {FORCE_DATASET_ROOT} を使用します。")
            return FORCE_DATASET_ROOT, True
        else:
            print(f"[force] {FORCE_DATASET_ROOT} が見つかりません。通常の解決ロジックに戻ります。")

    # 1) ディレクトリ名にステップ数が含まれるものを優先
    if step is not None:
        for dataset_root in sorted(base.iterdir()):
            name = dataset_root.name
            if (name.endswith(f'-{step}') or name.endswith(f'_{step}')
                    or f'-{step}-' in name or f'_{step}_' in name):
                return dataset_root, False

    # 2) metadata の sequence_length で一致を探す
    for dataset_root in sorted(base.iterdir()):
        meta, meta_path = _find_metadata(dataset_root)
        if not meta:
            continue
        seq = _extract_sequence_length(meta)
        if step is not None and seq == step:
            return dataset_root, False

    # 3) フォールバック（もっとも汎用な800データセット）
    fallback = base / 'dam-break-left-800'
    if fallback.exists():
        print(f"[fallback] step={step} に一致するデータセットが見つからず、{fallback} を使用します。")
        return fallback, True
    print(f"[fallback] step={step} に一致するデータセットも既定の dam-break-left-800 も見つかりません。")
    return None, True

# 出力設定（共通）
output_root = REPO / 'rollouts'
viz_format = 'html'  # html|mp4|gif

with cfg_path.open('r', encoding='utf-8') as f:
    cfg = yaml.safe_load(f)

# データセットはモデルごとに後続セルで差し替える
cfg['method'] = 'gns'
cfg['rollout_inference_max_examples'] = 1    #推論する数
cfg['output_path'] = str(output_root)
cfg.setdefault('scenario_options', {}).setdefault('fluid', {})['dataset'] = '/kaggle/input'  # placeholder

# モデルは後続セルで差し替える（ベースを保存しておく）
cfg['model_path'] = str(REPO / 'models')
cfg['model_file'] = None
cfg['output_filename'] = 'rollout'

BASE_CFG = copy.deepcopy(cfg)

with cfg_path.open('w', encoding='utf-8') as f:
    yaml.safe_dump(cfg, f, allow_unicode=True)

print('ベース設定を書き出しました:', cfg_path)
print('output_path:', cfg['output_path'])



In [None]:
%cd /kaggle/working/code

import copy
import re
import subprocess
from pathlib import Path
import numpy as np
import torch
import sys
sys.path.append(str(Path('/kaggle/working/code/src')))

from analyze_rollouts import analyze_rollout
from simulator_factory import _get_simulator
from train_config import load_config, INPUT_SEQUENCE_LENGTH, KINEMATIC_PARTICLE_ID
from train_paths import _resolve_model_path
from train_utils import _resolve_rollout_dataset_path
import data_loader
import reading_utils

# 比較対象モデルを集約（rollout_diff 優先）
model_roots = [
    REPO / 'models' / 'dataset_diff',
    REPO / 'models',
]
model_files: list[Path] = []
for root in model_roots:
    if not root.exists():
        continue
    for path in root.glob('*.pt'):
        model_files.append(path.resolve())

# 重複除去 & ソート（親ディレクトリ優先→ステップ番号）
seen = set()
unique_models: list[Path] = []
for p in model_files:
    if p in seen:
        continue
    seen.add(p)
    unique_models.append(p)

def _step_key(p: Path):
    m = re.search(r'(\d+)', p.stem)
    step = int(m.group(1)) if m else -1
    priority = 0 if p.parent.name == "dataset_diff" else 1
    return (priority, p.parent.name, step)

def _step_from_path(p: Path):
    m = re.search(r'(\d+)(?!.*\d)', p.stem)
    return int(m.group(1)) if m else None

model_files = sorted(unique_models, key=_step_key)

if not model_files:
    raise FileNotFoundError('*.pt が見つかりません。models/ 下にモデルを配置してください。')

print('評価対象モデル:')
for p in model_files:
    print(' -', p)


def compute_one_step_error(cfg_path: Path, device: torch.device):
    # ローダの最初の軌跡のみで1-step誤差を計算
    cfg_obj = load_config(str(cfg_path))
    dataset_path = _resolve_rollout_dataset_path(cfg_obj)
    if dataset_path is None:
        raise FileNotFoundError(
            f"rollout/valid/test/train の npz が見つかりません: {cfg_obj.data_path}"
        )

    metadata_key = (
        cfg_obj.active_scenario.rollout_metadata_split
        if getattr(cfg_obj, 'active_scenario', None)
        and cfg_obj.active_scenario.rollout_metadata_split
        else 'rollout'
    )
    metadata = reading_utils.read_metadata(cfg_obj.data_path, metadata_key)
    if isinstance(metadata, dict) and 'acc_mean' not in metadata:
        metadata = metadata.get('rollout') or metadata.get('train') or metadata
    simulator = _get_simulator(
        metadata,
        cfg_obj.noise_std,
        cfg_obj.noise_std,
        device,
        cfg_obj,
    )
    model_file = _resolve_model_path(cfg_obj)
    simulator.load(model_file)
    simulator.to(device)
    simulator.eval()

    loader = data_loader.get_data_loader_by_trajectories(dataset_path)

    with torch.no_grad():
        for traj in loader:
            if len(traj) == 4:
                positions, particle_type, material_property, n_particles = traj
                material_property = material_property.to(device)
            else:
                positions, particle_type, n_particles = traj
                material_property = None

            positions = positions.to(device)
            particle_type = particle_type.to(device)
            n_particles_tensor = torch.tensor(
                [int(n_particles)], device=device, dtype=torch.int32
            )

            total_steps = positions.shape[1] - INPUT_SEQUENCE_LENGTH
            if total_steps <= 0:
                continue

            step_errors: list[float] = []
            for t in range(total_steps):
                window = positions[:, t : t + INPUT_SEQUENCE_LENGTH]
                target = positions[:, t + INPUT_SEQUENCE_LENGTH]

                pred = simulator.predict_positions(
                    window,
                    nparticles_per_example=n_particles_tensor,
                    particle_types=particle_type,
                    material_property=material_property,
                )

                # 運動制約粒子は GT を強制
                kinematic_mask = (particle_type == KINEMATIC_PARTICLE_ID).to(device)
                pred = torch.where(kinematic_mask[:, None], target, pred)

                dist = torch.sqrt(((pred - target) ** 2).sum(dim=-1))
                step_errors.append(float(dist.mean().item()))

            if step_errors:
                return {
                    'per_timestep': step_errors,
                    'mean_distance_error': float(np.mean(step_errors)),
                    'n_rollouts': 1,
                    'used_length': len(step_errors),
                }

    return None


results = []

cmd = ['python', 'src/train.py', '--config', str(cfg_path)]
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

for model_path in model_files:
    tag = model_path.stem  # ディレクトリ名を含めずモデル名のみ
    step_num = _step_from_path(model_path)
    dataset_root, used_fallback = resolve_dataset_root(step_num)
    if dataset_root is None:
        raise FileNotFoundError(f"対応するデータセットが見つかりません（step={step_num}）")

    cfg = copy.deepcopy(BASE_CFG)
    cfg['model_path'] = str(model_path.parent)
    cfg['model_file'] = str(model_path)
    cfg['output_filename'] = f"rollout_{tag}"
    cfg.setdefault('scenario_options', {}).setdefault('fluid', {})['dataset'] = str(dataset_root)
    cfg['data_path'] = str(dataset_root)
    cfg['rollout_dataset'] = 'valid'

    with cfg_path.open('w', encoding='utf-8') as f:
        yaml.safe_dump(cfg, f, allow_unicode=True)

    print(f"=== {tag} ===")
    print('dataset:', dataset_root)
    if used_fallback:
        print('[warn] 指定ステップに一致するデータセットが見つからなかったためフォールバックを使用しました。')
    print('Running:', ' '.join(cmd))
    subprocess.run(cmd, check=True)

    output_dir = Path(cfg['output_path']) / cfg['method'] / cfg['output_filename']
    pkl_files = sorted(output_dir.glob(f"{cfg['output_filename']}_ex*.pkl"))
    if not pkl_files:
        raise FileNotFoundError(f"{output_dir} に *_ex*.pkl がありません。推論が成功したか確認してください。")

    # 最初の軌跡のみ使用
    res_one = analyze_rollout(pkl_files[0])
    dist = np.array(res_one['distance_error_per_timestep'])
    mean_full = float(dist.mean())

    one_step_res = compute_one_step_error(cfg_path, device)
    if one_step_res:
        print(
            f"  one-step (1軌跡): steps={one_step_res['used_length']} / mean distance error: {one_step_res['mean_distance_error']:.6f}"
        )
    else:
        print('  one-step: 計算できませんでした。')

    results.append({
        'tag': tag,
        'distance_error_per_timestep': dist.tolist(),
        'mean_distance_error': mean_full,
        'n_rollouts': 1,
        'used_length': len(dist),
        'pkl_dir': str(output_dir),
        'dataset_root': str(dataset_root),
        'step_num': step_num,
        'used_fallback': used_fallback,
        'one_step_error_per_timestep': one_step_res['per_timestep'] if one_step_res else [],
        'one_step_mean_distance_error': one_step_res['mean_distance_error'] if one_step_res else None,
        'one_step_used_length': one_step_res['used_length'] if one_step_res else 0,
        'one_step_n_rollouts': one_step_res['n_rollouts'] if one_step_res else 0,
    })

    print(
        f"  rollout: steps={len(dist)} / mean distance error: {mean_full:.6f} (1軌跡のみ)"
    )


In [None]:

import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
from itertools import cycle

MAX_SUMMARY_STEPS = 100
# None なら上限なし
MAX_PLOT_STEPS = None  # 1ステップ誤差の表示上限

# ラベル・凡例のサイズ設定（大きめ）
LABEL_FONTSIZE = 16
TICK_FONTSIZE = 14
LEGEND_FONTSIZE = 16
TITLE_FONTSIZE = 18

if not results:
    print('results が空です。前のセルを実行してください。')
else:
    # モデルごとに固定の色を割り当て（rollout と 1step で揃える）
    color_cycle = plt.rcParams['axes.prop_cycle'].by_key().get('color', [])
    if not color_cycle:
        color_cycle = list(plt.cm.tab10.colors)
    color_iter = cycle(color_cycle)
    color_map = {r['tag']: next(color_iter) for r in results}

    # -------- 全タイムステップの距離誤差（rollout） --------
    plt.figure(figsize=(10, 6))
    max_len_rollout = max(len(np.array(r['distance_error_per_timestep'])) for r in results)
    for r in results:
        err = np.array(r['distance_error_per_timestep'])
        timesteps = np.arange(len(err))
        # 短い系列ほど zorder を高くして上に重ねる
        z = max_len_rollout - len(err)
        plt.plot(
            timesteps,
            err,
            label=f"{r['tag']} (T={len(err)})",
            color=color_map[r['tag']],
            zorder=z,
        )
    plt.xlabel('timestep', fontsize=LABEL_FONTSIZE)
    plt.ylabel('mean distance error', fontsize=LABEL_FONTSIZE)
    plt.title('Rollout distance error per timestep (single trajectory)', fontsize=TITLE_FONTSIZE)
    plt.grid(True, alpha=0.3)
    plt.tick_params(labelsize=TICK_FONTSIZE)
    plt.legend(fontsize=LEGEND_FONTSIZE)
    plt.tight_layout()

    plot_path = output_root / 'distance_error_comparison.png'
    plt.savefig(plot_path, dpi=150)
    plt.show()
    print('プロットを保存:', plot_path)

    # -------- 1ステップ誤差（教師あり1-step, single trajectory） --------
    plot_items = []
    for r in results:
        err = np.array(r.get('one_step_error_per_timestep') or [])
        if err.size == 0:
            continue
        if MAX_PLOT_STEPS is not None:
            err = err[:MAX_PLOT_STEPS]
        # results の順序を維持して rollouts と同じ並びにする
        plot_items.append((len(err), err, r))

    if plot_items:
        plt.figure(figsize=(10, 6))
        max_len_onestep = max(len(item[1]) for item in plot_items)
        for _, err, r in plot_items:
            timesteps = np.arange(len(err))
            z = max_len_onestep - len(err)
            plt.plot(
                timesteps,
                err,
                marker='o',
                markersize=3,
                linewidth=1.2,
                label=f"{r['tag']} (T={len(err)})",
                color=color_map[r['tag']],
                zorder=z,
            )
        plt.xlabel('timestep', fontsize=LABEL_FONTSIZE)
        plt.ylabel('mean distance error (1-step, teacher forcing)', fontsize=LABEL_FONTSIZE)
        max_label = 'all' if MAX_PLOT_STEPS is None else MAX_PLOT_STEPS
        plt.title(f'1-step error per timestep (up to {max_label})', fontsize=TITLE_FONTSIZE)
        plt.grid(True, alpha=0.3)
        plt.tick_params(labelsize=TICK_FONTSIZE)
        plt.legend(fontsize=LEGEND_FONTSIZE)
        plt.tight_layout()

        plot_path_step = output_root / 'distance_error_per_timestep.png'
        plt.savefig(plot_path_step, dpi=150)
        plt.show()
        print('1ステップ誤差プロットを保存:', plot_path_step)
    else:
        print('one-step 誤差を描画できるデータがありません。')

    # -------- サマリ --------
    print('サマリ（平均距離誤差と長さ: いずれも1軌跡のみ）')
    for r in results:
        err = np.array(r['distance_error_per_timestep'])
        mean_full = float(err.mean())
        mean_100 = float(err[: min(MAX_SUMMARY_STEPS, len(err))].mean())
        print(f"- {r['tag']}: rollout steps={len(err)}, mean_full={mean_full:.6f}, mean@<=100={mean_100:.6f}")

        err1 = np.array(r.get('one_step_error_per_timestep') or [])
        if err1.size:
            mean_full1 = float(err1.mean())
            mean_1001 = float(err1[: min(MAX_SUMMARY_STEPS, len(err1))].mean())
            print(
                f"    1-step: steps={len(err1)}, mean_full={mean_full1:.6f}, mean@<=100={mean_1001:.6f}"
            )


In [None]:

import pickle
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path

DT = 0.006  # dam-break-left 系列のシミュレーション刻み
TARGET_NAME = 'dam-break-left-800'

if not results:
    print('results が空です。前のセルを実行してください。')
else:
    target = [r for r in results if TARGET_NAME in str(r.get('dataset_root', ''))]
    if not target:
        print(f"{TARGET_NAME} を含むデータセットの結果が見つかりません。先に FORCE を有効化するか、対象モデルだけ実行してください。")
    else:
        color_cycle = plt.rcParams['axes.prop_cycle'].by_key().get('color', []) or list(plt.cm.tab10.colors)
        color_map = {r['tag']: color_cycle[i % len(color_cycle)] for i, r in enumerate(target)}

        def _load_rms(pkl_path: Path):
            with pkl_path.open('rb') as f:
                data = pickle.load(f)
            pred = data['predicted_rollout']
            gt = data['ground_truth_rollout']
            if pred.ndim == 4:
                pred = pred[:, 0]
            if gt.ndim == 4:
                gt = gt[:, 0]
            v_pred = np.diff(pred, axis=0) / DT
            v_gt = np.diff(gt, axis=0) / DT
            a_pred = np.diff(v_pred, axis=0) / DT
            a_gt = np.diff(v_gt, axis=0) / DT
            v_rms_pred = np.sqrt((v_pred ** 2).mean(axis=(1, 2)))
            v_rms_gt = np.sqrt((v_gt ** 2).mean(axis=(1, 2)))
            a_rms_pred = np.sqrt((a_pred ** 2).mean(axis=(1, 2)))
            a_rms_gt = np.sqrt((a_gt ** 2).mean(axis=(1, 2)))
            return v_rms_pred, v_rms_gt, a_rms_pred, a_rms_gt

        fig, axes = plt.subplots(2, 1, figsize=(10, 8), sharex=True)
        for r in target:
            pdir = Path(r['pkl_dir'])
            pkls = sorted(pdir.glob('*_ex*.pkl'))
            if not pkls:
                print('pkl が見つかりません:', pdir)
                continue
            v_pred, v_gt, a_pred, a_gt = _load_rms(pkls[0])
            t_v = np.arange(len(v_pred))
            t_a = np.arange(len(a_pred))
            color = color_map[r['tag']]
            axes[0].plot(t_v, v_gt, linestyle='--', color=color, alpha=0.8, label=f"{r['tag']} GT")
            axes[0].plot(t_v, v_pred, linestyle='-', color=color, label=f"{r['tag']} Pred")
            axes[1].plot(t_a, a_gt, linestyle='--', color=color, alpha=0.8, label=f"{r['tag']} GT")
            axes[1].plot(t_a, a_pred, linestyle='-', color=color, label=f"{r['tag']} Pred")

        axes[0].set_ylabel('velocity RMS')
        axes[0].set_title(f'Velocity RMS over time ({TARGET_NAME})')
        axes[0].grid(True, alpha=0.3)
        axes[1].set_xlabel('timestep')
        axes[1].set_ylabel('acceleration RMS')
        axes[1].set_title(f'Acceleration RMS over time ({TARGET_NAME})')
        axes[1].grid(True, alpha=0.3)

        handles, labels = axes[0].get_legend_handles_labels()
        axes[0].legend(handles, labels, loc='upper right')
        handles, labels = axes[1].get_legend_handles_labels()
        axes[1].legend(handles, labels, loc='upper right')

        fig.tight_layout()
        out_path = output_root / f'rms_{TARGET_NAME}.png'
        fig.savefig(out_path, dpi=150)
        plt.show()
        print('RMS プロットを保存:', out_path)

