# 方策評価ノートブック

不確実性パラメタ ω を10等分した各点で、各手法（M2TD3, DR(TD3), RARL）の方策を30エピソード評価し、平均・標準誤差・最悪平均を可視化します。

参考: [Tanabe et al., 2022](https://arxiv.org/pdf/2211.03413) の図5・図6


In [None]:
# 1. 設定
import os
import sys
import glob
import importlib.util
from pathlib import Path
from typing import Dict, List, Tuple, Optional
import numpy as np
import matplotlib.pyplot as plt
import gymnasium as gym
import torch
import rrls  # RRLS環境の登録

# 実験設定
env_name = "HalfCheetah"  # "Ant", "HalfCheetah", "Hopper", "InvertedPendulum", "Walker", "Walker2d"
nb_dim = 1  # 不確実性次元: 1, 2, または 3
nb_mesh = 10  # 各次元の分割数（10等分）
seeds = 30  # 各ωでのエピソード数
max_steps = 1000  # 1エピソードの最大ステップ数
device = "cpu"

# TCRMDP/srcのパス（M2TD3用）
# リポジトリルート（ノートブック実行時はカレントをリポジトリルートに cd すること。環境変数 WORKSPACE_ROOT で上書き可）
workspace_root = Path(os.environ["WORKSPACE_ROOT"]) if os.environ.get("WORKSPACE_ROOT") else Path.cwd()
tcrmdp_src = str(workspace_root / "TCRMDP" / "src")

# 方策パスの指定（globパターン可、複数指定可）
# 相対パスはworkspace_rootを基準に解決されます
method_to_globs: Dict[str, List[str]] = {
    "M2TD3": ["exp/HC/HC_dim1_M2TD3_seed*/**/policies/policy-*.pth"],
    "DR": ["exp/HC/HC_dim1_DR_seed*/agent.pth"],
    "soft-omega-M2TD3": ["exp/HC/HC_dim1_soft-omega-M2TD3_seed*/**/policies/policy-*.pth"],
    # 学習済み方策のパスを上記 glob で解決するか、必要に応じて追加
}

# 結果キャッシュのパス（Noneで無効化）
cache_path = None  # 例: "cache/evaluation_results.npz"

print(f"環境: {env_name}, 次元: {nb_dim}D, メッシュ: {nb_mesh}, エピソード数: {seeds}, デバイス: {device}")



In [None]:
# 2. ユーティリティ読み込み（scriptsから関数を動的インポート）

def load_module_from_file(filepath: str, module_name: str):
    """ファイルパスからモジュールを動的に読み込む"""
    spec = importlib.util.spec_from_file_location(module_name, filepath)
    if spec is None or spec.loader is None:
        raise ImportError(f"Failed to load module from {filepath}")
    module = importlib.util.module_from_spec(spec)
    sys.path.insert(0, str(Path(filepath).parent))
    spec.loader.exec_module(module)
    return module

# scriptsディレクトリのパス
scripts_dir = workspace_root / "scripts"
eval_m2td3_path = str(scripts_dir / "rrls_light_eval.py")
eval_td3_path = str(scripts_dir / "rrls_light_eval_td3.py")

# モジュールを読み込み
eval_m2td3 = load_module_from_file(eval_m2td3_path, "rrls_light_eval")
eval_td3 = load_module_from_file(eval_td3_path, "rrls_light_eval_td3")

# 必要な関数を取得
rrls_components = eval_m2td3.rrls_components
rollout_return = eval_m2td3.rollout_return
build_agent_m2td3 = eval_m2td3.build_agent
build_agent_td3 = eval_td3.build_agent
env_id_from_name = eval_m2td3.env_id_from_name
resolve_policy_path = eval_m2td3.resolve_policy_path


# --- M2SAC 用の軽量ビルド関数 -----------------------------
# M2SAC の policy-*.pth（GaussianPolicyNetwork の state_dict）を
# 評価用に読み込むためのラッパをノートブック側に実装する。

import importlib.util as _importlib_util
from types import ModuleType as _ModuleType


class _M2SACAgentWrapper:
    """GaussianPolicyNetwork をラップして select_action を提供する簡易エージェント"""

    def __init__(self, policy: torch.nn.Module, device: torch.device, obs_dim: int):
        self.policy = policy.to(device)
        self.device = device
        self.obs_dim = obs_dim
        self.policy.eval()

    @torch.no_grad()
    def select_action(self, obs: np.ndarray, use_random: bool = False) -> np.ndarray:
        state_tensor = torch.tensor(obs, dtype=torch.float32, device=self.device).view(
            -1, self.obs_dim
        )
        action = self.policy(state_tensor)
        return action.squeeze(0).detach().cpu().numpy()


def _infer_m2sac_arch(path: str) -> tuple[int, int]:
    """M2SAC の policy-*.pth から隠れ層数とユニット数を推定"""
    sd = torch.load(path, map_location="cpu")
    # hidden layer width = out_features of input_layer
    hidden_layer = (
        int(sd["input_layer.weight"].shape[0])
        if "input_layer.weight" in sd
        else 256
    )
    # count hidden layers by scanning keys like hidden_layers.<idx>.weight
    max_idx = -1
    prefix = "hidden_layers."
    suffix = ".weight"
    for k in sd.keys():
        if k.startswith(prefix) and k.endswith(suffix):
            try:
                idx = int(k[len(prefix) :].split(".")[0])
                if idx > max_idx:
                    max_idx = idx
            except Exception:
                continue
    hidden_num = max_idx + 1 if max_idx >= 0 else 0
    return hidden_num, hidden_layer


def build_agent_m2sac(policy_path: str, env: gym.Env, device: str, tcrmdp_src: str):
    """M2SAC の policy-*.pth から評価用エージェントを構築する"""
    if tcrmdp_src is None:
        raise ValueError("tcrmdp_src must be provided to locate m2td3/utils.py")

    tcrmdp_src_resolved = os.path.expanduser(os.path.expandvars(tcrmdp_src))
    utils_path = os.path.join(tcrmdp_src_resolved, "m2td3", "utils.py")
    if not os.path.exists(utils_path):
        raise FileNotFoundError(f"m2td3/utils.py not found: {utils_path}")

    spec = _importlib_util.spec_from_file_location("m2td3_utils_for_m2sac", utils_path)
    if spec is None or spec.loader is None:
        raise ImportError(f"Failed to load module from {utils_path}")
    module: _ModuleType = _importlib_util.module_from_spec(spec)
    if tcrmdp_src_resolved not in sys.path:
        sys.path.insert(0, tcrmdp_src_resolved)
    spec.loader.exec_module(module)  # type: ignore[attr-defined]
    GaussianPolicyNetwork = getattr(module, "GaussianPolicyNetwork")

    obs_dim = int(np.prod(env.observation_space.shape))
    act_dim = int(np.prod(env.action_space.shape))
    hidden_num, hidden_layer = _infer_m2sac_arch(policy_path)

    device_t = torch.device(device if torch.cuda.is_available() else "cpu")
    policy = GaussianPolicyNetwork(
        state_dim=obs_dim,
        action_dim=act_dim,
        hidden_num=hidden_num,
        hidden_layer=hidden_layer,
        max_action=env.action_space.high[0],
        device=device_t,
    ).to(device_t)

    state_dict = torch.load(policy_path, map_location="cpu")
    policy.load_state_dict(state_dict)
    policy.to(device_t)

    return _M2SACAgentWrapper(policy=policy, device=device_t, obs_dim=obs_dim)


# --- Ablation(TD3) 用: actor の state_dict だけを保存した policy-*.pth を読む ----
class _TD3ActorOnlyWrapper:
    def __init__(self, actor: torch.nn.Module, device: torch.device, obs_dim: int):
        self.actor = actor.to(device)
        self.device = device
        self.obs_dim = obs_dim
        self.actor.eval()

    @torch.no_grad()
    def select_action(self, obs: np.ndarray, use_random: bool = False) -> np.ndarray:
        state_tensor = torch.tensor(obs, dtype=torch.float32, device=self.device).view(
            -1, self.obs_dim
        )
        action = self.actor(state_tensor)
        return action.squeeze(0).detach().cpu().numpy()


def build_agent_td3_actor_only(policy_path: str, env: gym.Env, device: str, tcrmdp_src: str):
    """Ablation TD3 の policy-*.pth（Actorのstate_dict）から評価用エージェントを構築する"""
    if tcrmdp_src is None:
        raise ValueError("tcrmdp_src must be provided to locate td3/models.py")

    tcrmdp_src_resolved = os.path.expanduser(os.path.expandvars(tcrmdp_src))
    models_path = os.path.join(tcrmdp_src_resolved, "td3", "models.py")
    if not os.path.exists(models_path):
        raise FileNotFoundError(f"td3/models.py not found: {models_path}")

    spec = _importlib_util.spec_from_file_location("td3_models_for_ablation", models_path)
    if spec is None or spec.loader is None:
        raise ImportError(f"Failed to load module from {models_path}")
    module: _ModuleType = _importlib_util.module_from_spec(spec)
    if tcrmdp_src_resolved not in sys.path:
        sys.path.insert(0, tcrmdp_src_resolved)
    spec.loader.exec_module(module)  # type: ignore[attr-defined]

    Actor = getattr(module, "Actor")

    obs_dim = int(np.prod(env.observation_space.shape))
    device_t = torch.device(device if torch.cuda.is_available() else "cpu")

    actor = Actor(observation_dim=obs_dim, action_space=env.action_space).to(device_t)

    ckpt = torch.load(policy_path, map_location="cpu")
    # 旧形式（agent.pth 等）で {"actor": ...} の場合にも対応
    if isinstance(ckpt, dict) and "actor" in ckpt:
        ckpt = ckpt["actor"]
    actor.load_state_dict(ckpt)
    actor.to(device_t)

    return _TD3ActorOnlyWrapper(actor=actor, device=device_t, obs_dim=obs_dim)


# Ablation SAC / DR-SAC は policy の形式が M2SAC と同じ（GaussianPolicyNetwork state_dict）なので流用
build_agent_ablation_sac = build_agent_m2sac
build_agent_ablation_dr_sac = build_agent_m2sac

print("✓ ユーティリティ関数の読み込み完了 (M2SAC + Ablation対応)")



In [None]:
# フォールバック付き rrls_components を定義し、上書き
def rrls_components(env_name: str, nb_dim: int):
    import importlib
    from rrls.evaluate import generate_evaluation_set

    def pick_module(candidates):
        last_err = None
        for mod in candidates:
            try:
                return importlib.import_module(f"rrls.envs.{mod}")
            except Exception as e:
                last_err = e
        raise last_err

    # Envごとの候補モジュール名（左から順に試す）
    if env_name == "Ant":
        mod = pick_module(["ant"])
        ParamsBound = getattr(mod, "AntParamsBound")
        ModifiedEnv = getattr(mod, "RobustAnt")
    elif env_name == "HalfCheetah":
        mod = pick_module(["halfcheetah", "half_cheetah"])
        ParamsBound = getattr(mod, "HalfCheetahParamsBound")
        ModifiedEnv = getattr(mod, "RobustHalfCheetah")
    elif env_name == "Hopper":
        mod = pick_module(["hopper"])
        ParamsBound = getattr(mod, "HopperParamsBound")
        ModifiedEnv = getattr(mod, "RobustHopper")
    elif env_name == "HumanoidStandup":
        mod = pick_module(["humanoidstandup", "humanoid"])
        ParamsBound = getattr(mod, "HumanoidStandupParamsBound")
        # クラス名のバリエーションに対応
        ModifiedEnv = getattr(mod, "RobustHumanoidStandup", getattr(mod, "RobustHumanoidStandUp"))
    elif env_name == "InvertedPendulum":
        mod = pick_module(["invertedpendulum", "pendulum"])
        ParamsBound = getattr(mod, "InvertedPendulumParamsBound")
        ModifiedEnv = getattr(mod, "RobustInvertedPendulum")
    elif env_name in ("Walker", "Walker2d"):
        mod = pick_module(["walker2d", "walker"])
        # Walker2d系で公開名が異なる場合に対応
        ParamsBound = getattr(mod, "Walker2dParamsBound", getattr(mod, "WalkerParamsBound"))
        ModifiedEnv = getattr(mod, "RobustWalker2d", getattr(mod, "RobustWalker"))
    else:
        raise ValueError(f"Unsupported env_name: {env_name}")

    # 次元に応じて境界を選択
    if nb_dim == 3:
        param_bounds = ParamsBound.THREE_DIM.value
    elif nb_dim == 2:
        param_bounds = ParamsBound.TWO_DIM.value
    else:
        # 1次元のときだけ、HalfCheetah の worldfriction 上限を 4.0 に揃える
        base = dict(ParamsBound.ONE_DIM.value)
        if env_name == "HalfCheetah" and "worldfriction" in base:
            low, _ = base["worldfriction"]
            base["worldfriction"] = [low, 4.0]
        param_bounds = base

    return ModifiedEnv, param_bounds, generate_evaluation_set

ModifiedEnv, param_bounds, generate_evaluation_set = rrls_components(env_name, nb_dim)
print("\u2713 ModifiedEnv と param_bounds を再取得")


In [None]:
# 3+. 含端linspaceで評価用環境を再生成（終点を必ず含む）
from itertools import product

# 既存の ModifiedEnv, param_bounds, nb_mesh を利用

def generate_evaluation_set_inclusive(modified_env, param_bounds: dict, nb_mesh_dim: int):
    keys = list(param_bounds.keys())
    grids = [
        np.linspace(v[0], v[1], num=nb_mesh_dim, endpoint=True).tolist()
        for k, v in param_bounds.items()
    ]
    envs = []
    for vals in product(*grids):
        params = {k: float(val) for k, val in zip(keys, vals)}
        envs.append(modified_env(**params))
    return envs

# 置き換え
env_set = generate_evaluation_set_inclusive(ModifiedEnv, param_bounds, nb_mesh)
num_envs = len(env_set)
print(f"[inclusive] 環境数: {num_envs}")

# ω値を再抽出（一次元想定）
omega_vals = []
for env in env_set:
    try:
        if hasattr(env, "get_params") and callable(getattr(env, "get_params")):
            params = env.get_params()
            key0 = list(param_bounds.keys())[0]
            omega_vals.append(float(params[key0]))
        else:
            omega_vals.append(len(omega_vals))
    except Exception:
        omega_vals.append(len(omega_vals))

omega_vals = np.array(omega_vals)
if len(set(omega_vals)) == len(omega_vals):
    idx = np.argsort(omega_vals)
    omega_vals = omega_vals[idx]
    env_set = [env_set[i] for i in idx]
else:
    omega_vals = np.arange(num_envs)

print(f"[inclusive] ω値の範囲: [{omega_vals.min():.3f}, {omega_vals.max():.3f}]")
print(f"[inclusive] ω値: {omega_vals[:5]} ... {omega_vals[-5:]}")



In [None]:
# 4. 単一方策の評価関数

def evaluate_policy(
    policy_path: str,
    method: str,  # "M2TD3", "soft-actor-M2TD3", "DR", "RARL"
    env_set: List[gym.Env],
    omega_vals: np.ndarray,
    seeds: int,
    max_steps: int,
    device: str,
    tcrmdp_src: str,
) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
    """
    単一方策を全ω×seedsで評価
    
    Returns:
        mean_per_env: 各ωでの平均報酬 (len=nb_mesh)
        sem_per_env: 各ωでの標準誤差 (len=nb_mesh)
        returns_matrix: シード別リターン行列 (shape: [seeds, nb_mesh])
    """
    print(f"  評価中: {policy_path}")
    
    # 基底環境を作成
    base_env_id = env_id_from_name(env_name)
    base_env = gym.make(base_env_id)
    
    # エージェントを構築
    if method in (
        "M2TD3",
        "soft-actor-M2TD3",
        "soft-omega-M2TD3",
        "soft-omega-M2TD3-high",
        "soft-omega-M2TD3-low",
        "soft-omega-M2TD3-ex-low",
        "only-soft-omega-M2TD3",
    ):
        # soft-actor-M2TD3 と soft-omega-M2TD3系（high/low/ex-low含む）も M2TD3 と同じエージェント実装で評価
        agent = build_agent_m2td3(policy_path, base_env, device, tcrmdp_src)
    elif method in ("M2SAC", "Ablation-SAC", "Ablation-DR-SAC"):
        # (M2SAC / Ablation-SAC / Ablation-DR-SAC)
        # GaussianPolicyNetwork の state_dict を読む（policy-*.pth）
        agent = build_agent_m2sac(policy_path, base_env, device, tcrmdp_src)
    elif method in ("Ablation-TD3",):
        # Ablation TD3 は actor の state_dict だけを保存しているため専用ローダを使う
        agent = build_agent_td3_actor_only(policy_path, base_env, device, tcrmdp_src)
    else:  # DR or RARL (both use TD3)
        agent = build_agent_td3(policy_path, base_env, device, tcrmdp_src)
    
    # 各環境（ω）で評価
    returns_per_env = []  # List[List[float]], len=nb_mesh
    
    for env_idx, env in enumerate(env_set):
        env_returns = []
        for seed in range(seeds):
            ret = rollout_return(env, agent, seed=seed, max_steps=max_steps)
            env_returns.append(ret)
        returns_per_env.append(env_returns)
    
    # 平均と標準誤差を計算
    mean_per_env = np.array([np.mean(rets) for rets in returns_per_env])
    sem_per_env = np.array([
        np.std(rets, ddof=1) / np.sqrt(len(rets)) if len(rets) > 1 else 0.0
        for rets in returns_per_env
    ])
    
    # シード別行列 (seeds, nb_mesh)
    returns_matrix = np.array(returns_per_env).T
    
    return mean_per_env, sem_per_env, returns_matrix

print("✓ 評価関数定義完了")



In [None]:
# 5. 方策パスの解決と評価実行

import pickle

# ポリシー評価のキャッシュ（方策パスごと）
_policy_cache_dir = workspace_root / "cache"
_policy_cache_path = _policy_cache_dir / "policy_eval_cache.pkl"
os.makedirs(_policy_cache_dir, exist_ok=True)

try:
    with open(_policy_cache_path, "rb") as f:
        policy_eval_cache = pickle.load(f)
    print(f"✓ ポリシー評価キャッシュを読み込み: {_policy_cache_path} (entries={len(policy_eval_cache)})")
except FileNotFoundError:
    policy_eval_cache = {}
    print(f"✓ ポリシー評価キャッシュ: 新規作成 ({_policy_cache_path})")
except Exception as e:
    print(f"⚠ ポリシー評価キャッシュの読み込みに失敗したため無視します: {e}")
    policy_eval_cache = {}


def _policy_cache_key(method_name: str, policy_path: str) -> str:
    """キャッシュ用のキーを生成（環境設定＋手法＋絶対パス）"""
    abs_path = os.path.abspath(policy_path)
    return "|".join(
        [
            f"env={env_name}",
            f"dim={nb_dim}",
            f"mesh={nb_mesh}",
            f"seeds={seeds}",
            f"max_steps={max_steps}",
            f"method={method_name}",
            f"path={abs_path}",
        ]
    )


def get_policy_eval(policy_path: str, method_name: str):
    """
    方策評価をキャッシュ付きで取得するヘルパー。
    既に計算済みならディスク上の結果を再利用し、未評価なら evaluate_policy を実行して保存する。
    """
    key = _policy_cache_key(method_name, policy_path)
    if key in policy_eval_cache:
        cached = policy_eval_cache[key]
        print(f"  キャッシュ再利用: {policy_path}")
        return (
            np.array(cached["mean_per_env"]),
            np.array(cached["sem_per_env"]),
            np.array(cached["returns_matrix"]),
        )

    mean_per_env, sem_per_env, returns_matrix = evaluate_policy(
        policy_path=policy_path,
        method=method_name,
        env_set=env_set,
        omega_vals=omega_vals,
        seeds=seeds,
        max_steps=max_steps,
        device=device,
        tcrmdp_src=tcrmdp_src,
    )
    policy_eval_cache[key] = {
        "mean_per_env": mean_per_env,
        "sem_per_env": sem_per_env,
        "returns_matrix": returns_matrix,
    }
    return mean_per_env, sem_per_env, returns_matrix


def _extract_step_from_policy(path: str) -> int | None:
    """方策ファイル名からステップ数を抽出（例: policy-5000000.pth -> 5000000）"""
    base = os.path.basename(path)
    if base.startswith("policy-") and base.endswith(".pth"):
        middle = base[len("policy-") : -len(".pth")]
        if middle.isdigit():
            return int(middle)
    return None

def resolve_policy_paths(globs: List[str]) -> List[str]:
    """globパターンから方策パスを解決（最新/最大ステップを選択）"""
    resolved = []
    for pattern in globs:
        # workspace_rootを基準にした相対パスを処理
        if not os.path.isabs(pattern):
            pattern = str(workspace_root / pattern)
        
        expanded = os.path.expandvars(os.path.expanduser(pattern))
        if any(ch in expanded for ch in ["*", "?", "["]):
            candidates = glob.glob(expanded, recursive=True)
            if not candidates:
                print(f"  警告: パターンに一致するファイルなし: {expanded}")
                continue
            
            # agent.pthの場合はそのまま使用
            agent_files = [p for p in candidates if os.path.basename(p) == "agent.pth"]
            if agent_files:
                resolved.extend(agent_files)
                continue
            
            # policy-*.pthの場合は最大ステップを選択
            policy_files = [p for p in candidates if _extract_step_from_policy(p) is not None]
            if policy_files:
                # 各ディレクトリごとに最新のpolicyファイルを選択
                policy_by_dir = {}
                for p in policy_files:
                    dir_path = os.path.dirname(p)
                    step = _extract_step_from_policy(p)
                    if dir_path not in policy_by_dir:
                        policy_by_dir[dir_path] = p
                    else:
                        existing_step = _extract_step_from_policy(policy_by_dir[dir_path])
                        if step and existing_step and step > existing_step:
                            policy_by_dir[dir_path] = p
                resolved.extend(policy_by_dir.values())
            else:
                # その他の場合は最新のものを選択
                resolved.append(max(candidates, key=lambda p: os.path.getmtime(p)))
        else:
            if os.path.exists(expanded):
                resolved.append(expanded)
            else:
                print(f"  警告: ファイルが見つかりません: {expanded}")
    return resolved

# 各手法の評価結果を格納
method_results: Dict[str, Dict[str, np.ndarray]] = {}

for method_name, globs in method_to_globs.items():
    if not globs:
        print(f"{method_name}: 方策パスが指定されていません。スキップします。")
        continue
    
    print(f"\n{method_name} の評価を開始...")
    policy_paths = resolve_policy_paths(globs)
    
    if not policy_paths:
        print(f"  {method_name}: 有効な方策パスが見つかりませんでした。")
        continue
    
    print(f"  見つかった方策数: {len(policy_paths)}")
    for pp in policy_paths:
        print(f"    - {pp}")
    
    # 各方策を評価（キャッシュ付き）
    policy_means = []
    policy_sems = []
    policy_returns_list = []  # 各方策のシード別行列を保持 (shape: [seeds, nb_mesh])
    
    for policy_path in policy_paths:
        try:
            mean_per_env, sem_per_env, returns_matrix = get_policy_eval(
                policy_path=policy_path,
                method_name=method_name,
            )
            policy_means.append(mean_per_env)
            policy_sems.append(sem_per_env)
            policy_returns_list.append(returns_matrix)
        except Exception as e:
            print(f"  エラー: {policy_path} の評価に失敗: {e}")
            import traceback
            traceback.print_exc()
            continue
    
    if not policy_means:
        print(f"  {method_name}: 評価に成功した方策がありません。")
        continue
    
    # 手法全体の平均と標準誤差を計算
    policy_means = np.array(policy_means)  # shape: (num_policies, nb_mesh)
    policy_sems = np.array(policy_sems)
    
    # 手法平均
    method_mean = np.mean(policy_means, axis=0)
    
    # 標準誤差の計算
    if len(policy_means) > 1:
        # 複数方策がある場合: 方策間の標準誤差
        method_sem = np.std(policy_means, axis=0, ddof=1) / np.sqrt(len(policy_means))
    else:
        # 単一方策の場合: エピソード間の標準誤差
        method_sem = policy_sems[0]
    
    # 最悪平均（min over ω）
    worst_avg = float(np.min(method_mean))
    
    # 全シード分のリターン行列を結合 (num_total_seeds, nb_mesh)
    all_returns = np.concatenate(policy_returns_list, axis=0) if policy_returns_list else None
    
    method_results[method_name] = {
        "mean": method_mean,
        "sem": method_sem,
        "worst_avg": worst_avg,
        "all_returns": all_returns,
        "policy_means": np.array(policy_means),  # shape: (num_policies, nb_mesh)
        "policy_sems": np.array(policy_sems),    # shape: (num_policies, nb_mesh)
        "policy_paths": policy_paths,
    }
    
    print(f"  {method_name} 完了: 最悪平均 = {worst_avg:.2f}")

# 評価結果のキャッシュを保存
try:
    with open(_policy_cache_path, "wb") as f:
        pickle.dump(policy_eval_cache, f)
    print(f"\n✓ ポリシー評価キャッシュを保存: {_policy_cache_path} (entries={len(policy_eval_cache)})")
except Exception as e:
    print(f"\n⚠ ポリシー評価キャッシュの保存に失敗しました: {e}")

print(f"\n✓ 全評価完了。評価された手法数: {len(method_results)}")



In [None]:
# 6. 可視化（平均・標準誤差・最悪平均）
# 縦軸範囲 (ymin, ymax) もインタラクティブに指定可能

import ipywidgets as widgets
from IPython.display import display

available_methods = list(method_results.keys())
method_checkboxes = [
    widgets.Checkbox(value=True, description=m, indent=False) for m in available_methods
]

# デフォルトの縦軸下限・上限を自動決定
def get_default_ylim():
    # 全手法の全meanとsemから範囲を計算
    means = []
    sems = []
    for r in method_results.values():
        means.append(r["mean"])
        sems.append(r["sem"])
    if means:
        means = np.stack(means)
        sems = np.stack(sems)
        global_min = float(np.min(means - sems))
        global_max = float(np.max(means + sems))
        ymin = max(0, global_min - 0.05 * abs(global_max - global_min))
        ymax = global_max + 0.05 * abs(global_max - global_min)
        return ymin, ymax
    return 0.0, 1.0

default_ymin, default_ymax = get_default_ylim()
ymin_widget = widgets.FloatText(value=default_ymin, description="ymin", step=1.0)
ymax_widget = widgets.FloatText(value=default_ymax, description="ymax", step=1.0)

def get_selected_methods():
    return [cb.description for cb in method_checkboxes if cb.value]

def plot_results(selected_methods, ymin=None, ymax=None):
    plot_targets = list(selected_methods) if selected_methods else available_methods
    missing_methods = [m for m in plot_targets if m not in method_results]
    if missing_methods:
        print(f"警告: 評価結果に存在しない手法をスキップします: {missing_methods}")
    plot_targets = [m for m in plot_targets if m in method_results]
    if not plot_targets:
        raise ValueError("可視化対象の手法がありません。手法を1つ以上選択してください。")

    # 色の設定（表示したい手法だけ色を定義。コメントアウトで非表示）
    colors = {
        "DR": "red",
        "soft-actor-M2TD3": "blue",
        "soft-omega-M2TD3": "cyan",
        "soft-omega-M2TD3-high": "teal",
        "soft-omega-M2TD3-low": "purple",
        "soft-omega-M2TD3-ex-low": "slateblue",
        "only-soft-omega-M2TD3": "deepskyblue",
        "M2SAC": "magenta",
        "M2TD3": "green",
        "RARL": "pink",

        # Ablation baselines
        "Ablation-SAC": "orange",
        "Ablation-DR-SAC": "gray",
        "Ablation-TD3": "brown",
    }
    label_map = {
        "soft-actor-M2TD3": "M2TD3 + SW(PI)",
        "soft-omega-M2TD3": "M2TD3 + SW(PI, EI)",
        "only-soft-omega-M2TD3": "M2TD3 + SW(EI)",
        "soft-omega-M2TD3-high": "α=4000",
        "soft-omega-M2TD3-low": "α=20",
        "soft-omega-M2TD3-ex-low": "α=1",
        "M2SAC": "M2SAC",
        "DR": "DR(TD3)",
        "RARL": "RARL(TD3)",

        # Ablation baselines
        "Ablation-SAC": "SAC",
        "Ablation-DR-SAC": "DR(SAC)",
        "Ablation-TD3": "TD3",
    }

    fig, ax = plt.subplots(figsize=(10, 6))

    for method_name in plot_targets:
        results = method_results[method_name]
        mean = results["mean"]
        sem = results["sem"]
        worst_avg = results["worst_avg"]
        color = colors.get(method_name)
        if color is None:
            print(f"スキップ: colors で色を設定していない手法: {method_name}")
            continue

        legend_label = label_map.get(method_name, method_name)
        ax.plot(omega_vals, mean, label=legend_label, color=color, linewidth=2)

        ax.fill_between(
            omega_vals,
            mean - sem,
            mean + sem,
            alpha=0.3,
            color=color,
        )
        ax.axhline(
            y=worst_avg,
            color=color,
            linestyle="--",
            linewidth=1.5,
            alpha=0.7,
        )

    ax.set_xlabel("Uncertainty parameter", fontsize=16)
    ax.set_ylabel("Cumulative reward", fontsize=16)
    ax.legend(fontsize=14)
    ax.tick_params(axis="both", labelsize=12)
    ax.grid(True, alpha=0.3)

    # 縦軸範囲のインタラクティブ制御
    ymin_ = ymin if ymin is not None else 0
    ymax_ = ymax if ymax is not None else ax.get_ylim()[1]
    if ymax_ <= ymin_:
        ymax_ = ymin_ + 1.0
    ax.set_ylim(bottom=ymin_, top=ymax_)

    plt.tight_layout()

    os.makedirs(workspace_root / "figs", exist_ok=True)
    output_path = workspace_root / "figs" / f"omega_sweep_{env_name}_{nb_dim}D.png"
    plt.savefig(output_path, dpi=300, bbox_inches="tight")
    print(f"✓ 図を保存: {output_path}")

    plt.show()

    # --- グラフ下にサマリ表を表示（全点平均 ± 標準誤差、最悪平均=破線） ---
    summary_rows = []
    for method_name in plot_targets:
        results = method_results[method_name]
        mean = np.asarray(results["mean"], dtype=float)
        sem = np.asarray(results["sem"], dtype=float)

        worst_avg = float(results.get("worst_avg", np.min(mean)))
        avg_all_points = float(np.mean(mean))
        avg_sem_all_points = float(np.mean(sem))

        summary_rows.append(
            {
                "手法": label_map.get(method_name, method_name),
                "全点平均 ± 標準誤差": f"{avg_all_points:.2f} ± {avg_sem_all_points:.2f}",
                "最悪平均(破線)": worst_avg,
            }
        )

    try:
        import pandas as pd

        df = pd.DataFrame(summary_rows)
        # "最悪平均(破線)"列は表示用に丸める
        df["最悪平均(破線)"] = df["最悪平均(破線)"].map(lambda x: float(np.round(x, 2)))
        display(df)
    except Exception:
        print("\n--- 集計（全点平均 ± 標準誤差 / 最悪平均=破線）---")
        for r in summary_rows:
            print(
                f"{r['手法']}: 全点平均 ± 標準誤差={r['全点平均 ± 標準誤差']}, "
                f"最悪平均(破線)={r['最悪平均(破線)']:.2f}"
            )

def update_plot(*args):
    with output_area:
        output_area.clear_output()
        try:
            plot_results(get_selected_methods(), ymin=ymin_widget.value, ymax=ymax_widget.value)
        except Exception as e:
            print(f"プロット失敗: {e}")

output_area = widgets.Output()

for cb in method_checkboxes:
    cb.observe(lambda change: update_plot() if change["name"] == "value" else None, names="value")
ymin_widget.observe(lambda change: update_plot() if change["name"] == "value" else None, names="value")
ymax_widget.observe(lambda change: update_plot() if change["name"] == "value" else None, names="value")

# 初期描画
with output_area:
    plot_results(get_selected_methods(), ymin=ymin_widget.value, ymax=ymax_widget.value)

controls = widgets.VBox([
    widgets.HBox([widgets.Label("<b>手法</b>", layout=widgets.Layout(width="70px")), ymin_widget, ymax_widget]),
    *method_checkboxes
])

display(widgets.VBox([controls, output_area]))



In [None]:
# 既存の手法カラー（凡例の補助に使用）
colors = {
    "M2TD3": "green",
    "soft-actor-M2TD3": "blue",  # 別枠だが M2TD3 系として扱う
    "soft-omega-M2TD3": "cyan",  # M2TD3 系として扱う
    "soft-omega-M2TD3-high": "teal",  # soft-omega-M2TD3-high用の色
    "soft-omega-M2TD3-low": "darkcyan",  # soft-omega-M2TD3-low用の色
    "soft-omega-M2TD3-ex-low": "slateblue",  # soft-omega-M2TD3-ex-low用の色
    "M2SAC": "orange",           # M2SAC 用の色
    "DR": "red",
    "RARL": "purple",
}

# 対象手法は結果があるものに限定
target_methods = [m for m in ["M2TD3", "soft-actor-M2TD3", "soft-omega-M2TD3", "soft-omega-M2TD3-high", "soft-omega-M2TD3-low", "soft-omega-M2TD3-ex-low", "M2SAC", "DR", "RARL"] if m in method_results]
policy_cmap = plt.get_cmap('tab10')


def ensure_policy_means(method_name: str, results: dict) -> Optional[np.ndarray]:
    """method_results内にpolicy_meansが無ければ再計算して補完"""
    policy_means = results.get("policy_means")
    if policy_means is not None:
        return policy_means

    policy_paths = results.get("policy_paths", [])
    if not policy_paths:
        print(f"{method_name}: policy_meansが見つからず、policy_pathsもありません")
        return None

    print(f"{method_name}: policy_meansが見つからないため再計算します...")
    recomputed_means = []
    for p in policy_paths:
        try:
            mean_per_env, _, _ = get_policy_eval(
                policy_path=p,
                method_name=method_name,
            )
            recomputed_means.append(mean_per_env)
        except Exception as e:
            print(f"  再計算失敗: {p}: {e}")

    if not recomputed_means:
        print(f"{method_name}: policy_meansの再計算に失敗しました")
        return None

    policy_means = np.array(recomputed_means)
    results["policy_means"] = policy_means  # 次回以降のために保存
    return policy_means


# 手法ごとに、学習シード（=方策パス）ごとの平均曲線を別々の図（台紙）に重ねてプロットする
for method_name in target_methods:
    results = method_results.get(method_name)
    if results is None:
        continue

    policy_means = ensure_policy_means(method_name, results)
    if policy_means is None:
        continue

    fig, ax = plt.subplots(figsize=(10, 6))

    # 同じ手法の中では、シードごとに色を変えて可視化
    for idx, mean_curve in enumerate(policy_means):
        seed_color = policy_cmap(idx % 10)  # シード番号ごとに色を変える
        ax.plot(omega_vals, mean_curve, label=f"Seed {idx+1}", alpha=0.8, color=seed_color)
    ax.set_xlabel("Uncertainty parameter", fontsize=16)
    ax.set_ylabel("Return", fontsize=16)
    ax.set_title(f"Policy Seeds: {method_name} ({env_name}, {nb_dim}D)", fontsize=11)
    ax.legend(fontsize=12)
    ax.tick_params(axis="both", labelsize=12)
    ax.grid(True, alpha=0.3)
    plt.tight_layout()

    # 台紙ごとに保存
    plot_path = workspace_root / "figs" / f"policy_seeds_{method_name}_{env_name}_{nb_dim}D.png"
    plt.savefig(plot_path, dpi=300, bbox_inches="tight")
    print(f"✓ {method_name} の図を保存: {plot_path}")
    plt.show()

# 全手法×全方策（学習シード）を1枚の図に集約
fig, ax = plt.subplots(figsize=(10, 6))

# 各手法ごとに、すべてのSeedの曲線を同じ色・同じ濃さで描画
for i, method_name in enumerate(target_methods):
    results = method_results.get(method_name)
    if results is None:
        continue

    policy_means = ensure_policy_means(method_name, results)
    if policy_means is None:
        continue

    policy_paths = results.get("policy_paths", [])
    color = colors.get(method_name, policy_cmap(i % 10))  # 手法ごとの色を固定
    for idx, mean_curve in enumerate(policy_means):
        label_suffix = os.path.basename(policy_paths[idx]) if idx < len(policy_paths) else f"policy{idx}"
        # 最初のseedだけlabelを書く（それ以降は重複しないようNoneに）
        label = method_name if idx == 0 else None
        ax.plot(
            omega_vals,
            mean_curve,
            color=color,
            linewidth=2,
            alpha=0.7,
            label=label
        )

ax.set_xlabel("Uncertainty parameter", fontsize=16)
ax.set_ylabel("Return", fontsize=16)
ax.set_title(f"Policy Evaluation by Seed: {env_name} ({nb_dim}D)", fontsize=11)
ax.tick_params(axis="both", labelsize=12)
# ラベルは手法ごとのみにする
handles, labels = ax.get_legend_handles_labels()
from collections import OrderedDict
by_label = OrderedDict()
for h, l in zip(handles, labels):
    if l is not None and l not in by_label:
        by_label[l] = h
ax.legend(by_label.values(), by_label.keys(), fontsize=14)
ax.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()



In [None]:
# 7. オプション: 結果のキャッシュ保存・読み込み

if cache_path:
    cache_file = workspace_root / cache_path
    os.makedirs(cache_file.parent, exist_ok=True)
    
    # 保存
    cache_data = {
        "omega_vals": omega_vals,
        "method_results": {k: {kk: vv.tolist() if isinstance(vv, np.ndarray) else vv 
                               for kk, vv in v.items()} 
                           for k, v in method_results.items()},
        "config": {
            "env_name": env_name,
            "nb_dim": nb_dim,
            "nb_mesh": nb_mesh,
            "seeds": seeds,
            "max_steps": max_steps,
        }
    }
    np.savez_compressed(cache_file, **cache_data)
    print(f"✓ キャッシュを保存: {cache_file}")
    
    # 読み込み例（コメントアウト）
    # loaded = np.load(cache_file, allow_pickle=True)
    # omega_vals = loaded["omega_vals"]
    # method_results = {k: {kk: np.array(vv) if isinstance(vv, list) else vv 
    #                       for kk, vv in v.item().items()} 
    #                  for k, v in loaded["method_results"].item().items()}
else:
    print("キャッシュは無効化されています")



In [None]:
# 8. 任意パラメタ点での方策動画生成
import imageio.v2 as imageio
from datetime import datetime

# --- 描画で床が途切れる対策（学習環境の物理は変えない） ----------------------
# MuJoCo の plane は「接触判定としては（多くの場合）無限」ですが、
# “見た目”は geom_size / zfar 等で有限に見えることがあります。
# 動画生成時だけ、床の描画範囲（plane geom の size）と zfar を大きくして
# 「床が途中で消えて空中に飛び出したように見える」を防ぎます。
try:
    import mujoco  # mujoco>=2
except Exception:
    mujoco = None


def _widen_mujoco_visual_floor(env: gym.Env, half_length: float = 2000.0, half_width: float = 200.0, zfar_margin: float = 4.0) -> None:
    """床の描画範囲と zfar だけを伸ばす（stat.extent は触らない）"""
    if mujoco is None:
        return
    try:
        model = env.unwrapped.model
    except Exception:
        return

    try:
        ngeom = int(getattr(model, "ngeom", 0))
    except Exception:
        ngeom = 0

    for gid in range(ngeom):
        try:
            gtype = int(model.geom_type[gid])
        except Exception:
            continue

        # mjGEOM_PLANE の描画サイズを拡張
        try:
            if gtype == int(mujoco.mjtGeom.mjGEOM_PLANE):
                size = model.geom_size[gid].copy()
                if size.shape[0] >= 2:
                    size[0] = max(float(size[0]), float(half_length))
                    size[1] = max(float(size[1]), float(half_width))
                    model.geom_size[gid] = size
                continue
        except Exception:
            pass

        # 2) 名前に floor/ground 等が含まれる床っぽい geom も拡張
        try:
            name = mujoco.mj_id2name(model, mujoco.mjtObj.mjOBJ_GEOM, gid)
        except Exception:
            name = None
        if not name:
            continue
        low = str(name).lower()
        if not any(k in low for k in ("floor", "ground", "terrain", "track")):
            continue
        try:
            size = model.geom_size[gid].copy()
            if size.shape[0] >= 2:
                size[0] = max(float(size[0]), float(half_length))
                size[1] = max(float(size[1]), float(half_width))
                model.geom_size[gid] = size
        except Exception:
            continue

    # 遠方クリップを延ばす（extent には触れない）
    try:
        model.vis.map.zfar = max(float(model.vis.map.zfar), float(half_length) * float(zfar_margin))
    except Exception:
        pass


def _set_tracking_camera(env: gym.Env, distance: float = 6.0, elevation: float = -10.0, azimuth: float = 90.0) -> None:
    """カメラをトラッキングにして画面外に逃げないようにする（描画専用）"""
    if not hasattr(env, "mujoco_renderer"):
        return

    bodyid = None
    try:
        model = env.unwrapped.model
        if hasattr(model, "body_name2id"):
            for name in ["torso", "root", "pelvis", "torso1"]:
                try:
                    bodyid = int(model.body_name2id(name))
                    break
                except Exception:
                    continue
    except Exception:
        pass

    cam_settings = {
        "distance": float(distance),
        "elevation": float(elevation),
        "azimuth": float(azimuth),
    }
    if bodyid is not None:
        cam_settings["trackbodyid"] = bodyid

    # 新しめの gymnasium.mujoco_renderer では set_camera_settings が使える
    try:
        env.mujoco_renderer.set_camera_settings(cam_settings)
    except Exception:
        pass

    # 旧来の viewer.cam API も併用しておく
    try:
        cam = getattr(env.mujoco_renderer, "cam", None)
        if cam is not None:
            if bodyid is not None and hasattr(cam, "trackbodyid"):
                cam.trackbodyid = bodyid
            if hasattr(cam, "distance"):
                cam.distance = max(float(cam.distance), float(distance))
            if hasattr(cam, "elevation"):
                cam.elevation = float(elevation)
            if hasattr(cam, "azimuth"):
                cam.azimuth = float(azimuth)
    except Exception:
        pass


# --- 設定ここから -----------------------------------------------------------
default_policy = None
# soft-actor-M2TD3 があればそれを優先し、なければ soft-omega-M2TD3系, M2SAC, Ablation, M2TD3, DR, RARL の順に探す
for m in [
    "soft-actor-M2TD3",
    "soft-omega-M2TD3",
    "soft-omega-M2TD3-high",
    "soft-omega-M2TD3-low",
    "soft-omega-M2TD3-ex-low",
    "M2SAC",
    "Ablation-SAC",
    "Ablation-DR-SAC",
    "Ablation-TD3",
    "M2TD3",
    "DR",
    "RARL",
]:
    cand = method_results.get(m, {}).get("policy_paths")
    if cand:
        default_policy = cand[0]
        default_method = m
        break

video_policy_path = default_policy  # 学習済み方策のパスを指定
video_method = default_method if 'default_method' in locals() else "M2TD3"  # "M2TD3", "M2SAC", "Ablation-SAC", "Ablation-DR-SAC", "Ablation-TD3", "soft-actor-M2TD3", "soft-omega-M2TD3", "soft-omega-M2TD3-high", "soft-omega-M2TD3-low", "soft-omega-M2TD3-ex-low", "DR", "RARL"
custom_omega_points = [omega_vals[0]]  # 例: [1.0, 15.0] または [{"length": 1.0}]
video_episode_seeds = list(range(min(3, seeds)))  # seeds変数と揃えたい場合は list(range(seeds))
video_max_steps = max_steps
video_fps = 30
video_output_dir = workspace_root / "figs" / "videos"
os.makedirs(video_output_dir, exist_ok=True)

# 描画用の床拡張（必要なら増やす）
video_floor_half_length = 2000.0
video_floor_half_width = 200.0
# --- 設定ここまで -----------------------------------------------------------

if not video_policy_path or not os.path.exists(video_policy_path):
    raise FileNotFoundError("video_policy_path を存在する方策ファイルに設定してください")

base_env_id = env_id_from_name(env_name)
base_env = gym.make(base_env_id)
try:
    if video_method in (
        "M2TD3",
        "soft-actor-M2TD3",
        "soft-omega-M2TD3",
        "soft-omega-M2TD3-high",
        "soft-omega-M2TD3-low",
        "soft-omega-M2TD3-ex-low",
    ):
        # soft-actor-M2TD3 と soft-omega-M2TD3系（high/low/ex-low含む）も M2TD3 と同じラッパーで読み込む
        video_agent = build_agent_m2td3(video_policy_path, base_env, device, tcrmdp_src)
    elif video_method in ("M2SAC", "Ablation-SAC", "Ablation-DR-SAC"):
        # (M2SAC / Ablation-SAC / Ablation-DR-SAC)
        video_agent = build_agent_m2sac(video_policy_path, base_env, device, tcrmdp_src)
    elif video_method in ("Ablation-TD3",):
        video_agent = build_agent_td3_actor_only(video_policy_path, base_env, device, tcrmdp_src)
    else:
        video_agent = build_agent_td3(video_policy_path, base_env, device, tcrmdp_src)
finally:
    base_env.close()

param_keys = list(param_bounds.keys())


def normalize_param_point(point) -> Dict[str, float]:
    """スカラー/リスト/辞書で指定したパラメタ点をdict化"""
    if isinstance(point, dict):
        param_dict = {k: float(point.get(k, param_bounds[k][0])) for k in param_keys}
    else:
        arr = np.atleast_1d(point).astype(float)
        if arr.size != len(param_keys):
            raise ValueError(f"param数が一致しません: 期待={len(param_keys)} 実際={arr.size}")
        param_dict = {k: float(v) for k, v in zip(param_keys, arr)}
    # 境界チェック
    for k, v in param_dict.items():
        lo, hi = map(float, param_bounds[k])
        if not (lo <= v <= hi):
            print(f"⚠ {k}={v} が境界 [{lo}, {hi}] を超えています")
    return param_dict


def instantiate_render_env(param_dict: Dict[str, float]) -> gym.Env:
    """rgb_arrayレンダリングを試みながら環境を生成"""
    try:
        env = ModifiedEnv(**param_dict, render_mode="rgb_array")
    except TypeError:
        env = ModifiedEnv(**param_dict)

    # 描画の床・遠方クリップを拡張 + カメラ追従（学習時の物理は変えない）
    _widen_mujoco_visual_floor(env, half_length=video_floor_half_length, half_width=video_floor_half_width)
    _set_tracking_camera(env)

    # デバッグ: 先頭1回だけカメラ設定と geom サイズを表示して原因を追う
    global _video_debug_done  # type: ignore
    if "_video_debug_done" not in globals():
        _video_debug_done = True
        try:
            model = env.unwrapped.model
            print("[debug] geom_size[0:3]", model.geom_size[:3])
            print("[debug] vis.map.zfar", getattr(model.vis.map, "zfar", None))
            try:
                viewer_cam = getattr(env.mujoco_renderer, "cam", None)
                if viewer_cam:
                    print("[debug] cam trackbodyid", getattr(viewer_cam, "trackbodyid", None))
                    print("[debug] cam distance/elevation/azimuth", getattr(viewer_cam, "distance", None), getattr(viewer_cam, "elevation", None), getattr(viewer_cam, "azimuth", None))
            except Exception:
                pass
        except Exception:
            pass
    return env


def grab_frame(env: gym.Env):
    frame = None
    try:
        frame = env.render()
    except Exception:
        frame = None
    if frame is None and hasattr(env, "mujoco_renderer"):
        try:
            frame = env.mujoco_renderer.render("rgb_array")
        except Exception:
            frame = None
    return frame


def record_episode_video(env: gym.Env, agent, seed: int, max_steps: int, fps: int, output_path: Path, reset_options: Dict[str, float]):
    frames = []

    # 学習時と同様に reset(options=omega_dict) 経路でパラメータを注入する
    # （ModifiedEnv が init 引数で受け取っていても、ここで上書きされる実装が多い）
    try:
        obs, _ = env.reset(seed=seed, options=reset_options)
    except TypeError:
        obs, _ = env.reset(seed=seed)

    first = grab_frame(env)
    if first is not None:
        frames.append(first)
    done = truncated = False
    steps = 0
    while not (done or truncated) and steps < max_steps:
        action = agent.select_action(obs, use_random=False)
        obs, _, done, truncated, _ = env.step(action)
        frame = grab_frame(env)
        if frame is not None:
            frames.append(frame)
        steps += 1
    env.close()
    if not frames:
        print(f"⚠ フレーム未取得: {output_path}")
        return
    imageio.mimsave(output_path, frames, fps=fps)
    print(f"✓ 動画保存: {output_path} (frames={len(frames)})")


for point in custom_omega_points:
    param_dict = normalize_param_point(point)
    param_tag = "_".join(f"{k}-{param_dict[k]:.3f}" for k in param_keys)
    for seed in video_episode_seeds:
        env = instantiate_render_env(param_dict)
        timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
        video_name = f"video_{video_method}_seed{seed}_{param_tag}_{timestamp}.mp4"
        output_path = video_output_dir / video_name
        record_episode_video(env, video_agent, seed=seed, max_steps=video_max_steps, fps=video_fps, output_path=output_path, reset_options=param_dict)


