# インタラクティブ軌道生成モデル性能評価

このNotebookでは各種軌道生成モデルの性能をインタラクティブに評価できます。

## 機能
- モデル選択（ドロップダウンメニュー）
- ハイパーパラメータ設定GUI
- 学習実行
- データ生成（条件ベクトル指定/ランダム選択）
- 軌道可視化（実軌道vs生成軌道）
- 誤差計算・評価

In [1]:
# CLAUDE_ADDED
import os
import sys
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import yaml
import torch
import ipywidgets as widgets
from IPython.display import display, clear_output
import subprocess
import glob
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')

# 現在のディレクトリ（src）をパスに追加
current_path = Path('.').absolute()
if str(current_path) not in sys.path:
    sys.path.append(str(current_path))

print(f"Working directory: {os.getcwd()}")
print(f"Current path added: {current_path}")

Working directory: /app
Current path added: /app


In [2]:
# CLAUDE_ADDED
class ModelEvaluator:
    def __init__(self):
        self.models = {
            'DiffWave': 'DiffWave',
            'HybridModel': 'HybridModel', 
            'HybridTransformer': 'HybridTransformer',
            'Transformer': 'Transformer',
            'UNet': 'UNet'
        }
        self.selected_model = None
        self.config = None
        self.dataset = None
        
    def load_config(self, model_name):
        """指定されたモデルのconfig.yamlを読み込み"""
        config_path = Path(self.models[model_name]) / 'config.yaml'
        if config_path.exists():
            with open(config_path, 'r', encoding='utf-8') as f:
                self.config = yaml.safe_load(f)
            return self.config
        else:
            print(f"Config file not found: {config_path}")
            return None
    
    def load_dataset(self, data_path="../data/Datasets/overfitting_dataset.npz"):
        """データセットを読み込み"""
        if Path(data_path).exists():
            self.dataset = np.load(data_path)
            print(f"Dataset loaded: {list(self.dataset.keys())}")
            return self.dataset
        else:
            print(f"Dataset not found: {data_path}")
            return None
    
    def get_available_checkpoints(self, model_name):
        """利用可能なチェックポイントファイルを取得"""
        model_dir = Path(self.models[model_name])
        checkpoint_patterns = [
            model_dir / "outputs" / "checkpoints" / "*.pth",
            model_dir / "checkpoints" / "*.pth",
            Path("checkpoints") / "*.pth",
            Path("outputs/checkpoints") / "*.pth"
        ]
        
        checkpoints = []
        for pattern in checkpoint_patterns:
            checkpoints.extend(glob.glob(str(pattern)))
        
        return sorted(list(set(checkpoints)))

evaluator = ModelEvaluator()
print("ModelEvaluator initialized")

ModelEvaluator initialized


## 1. モデル選択とハイパーパラメータ設定

In [3]:
# CLAUDE_ADDED
# モデル選択UI
model_dropdown = widgets.Dropdown(
    options=list(evaluator.models.keys()),
    value='DiffWave',
    description='モデル:',
    style={'description_width': 'initial'}
)

config_output = widgets.Output()

def on_model_change(change):
    with config_output:
        clear_output(wait=True)
        evaluator.selected_model = change['new']
        config = evaluator.load_config(change['new'])
        if config:
            print(f"✓ {change['new']} config loaded")
            create_hyperparameter_widgets(config)
        else:
            print(f"✗ Failed to load {change['new']} config")

model_dropdown.observe(on_model_change, names='value')

display(widgets.VBox([
    widgets.HTML("<h3>モデル選択</h3>"),
    model_dropdown,
    config_output
]))

# 初期設定
on_model_change({'new': model_dropdown.value})

VBox(children=(HTML(value='<h3>モデル選択</h3>'), Dropdown(description='モデル:', options=('DiffWave', 'HybridModel', …

In [4]:
# CLAUDE_ADDED
# ハイパーパラメータウィジェット作成関数
hyperparameter_widgets = {}
hyperparameter_container = widgets.VBox()

def create_hyperparameter_widgets(config):
    """設定に基づいてハイパーパラメータのウィジェットを動的生成"""
    global hyperparameter_widgets
    hyperparameter_widgets.clear()
    
    widgets_list = [widgets.HTML("<h3>ハイパーパラメータ設定</h3>")]
    
    # 共通パラメータ
    if 'training' in config:
        training_config = config['training']
        
        hyperparameter_widgets['batch_size'] = widgets.IntSlider(
            value=training_config.get('batch_size', 32),
            min=8, max=128, step=8,
            description='Batch Size:',
            style={'description_width': 'initial'}
        )
        
        hyperparameter_widgets['learning_rate'] = widgets.FloatLogSlider(
            value=training_config.get('learning_rate', 1e-4),
            base=10, min=-6, max=-2,
            description='Learning Rate:',
            style={'description_width': 'initial'}
        )
        
        epochs_key = 'epochs' if 'epochs' in training_config else 'num_epochs'
        hyperparameter_widgets['epochs'] = widgets.IntSlider(
            value=training_config.get(epochs_key, 100),
            min=10, max=500, step=10,
            description='Epochs:',
            style={'description_width': 'initial'}
        )
        
        widgets_list.extend([
            hyperparameter_widgets['batch_size'],
            hyperparameter_widgets['learning_rate'],
            hyperparameter_widgets['epochs']
        ])
    
    # モデル固有パラメータ
    if 'model' in config:
        model_config = config['model']
        
        # DiffWave固有
        if evaluator.selected_model == 'DiffWave':
            hyperparameter_widgets['residual_channels'] = widgets.IntSlider(
                value=model_config.get('residual_channels', 64),
                min=32, max=256, step=32,
                description='Residual Channels:',
                style={'description_width': 'initial'}
            )
            hyperparameter_widgets['num_layers'] = widgets.IntSlider(
                value=model_config.get('num_layers', 20),
                min=10, max=50, step=5,
                description='Num Layers:',
                style={'description_width': 'initial'}
            )
            widgets_list.extend([
                hyperparameter_widgets['residual_channels'],
                hyperparameter_widgets['num_layers']
            ])
        
        # Transformer固有
        elif evaluator.selected_model == 'Transformer':
            hyperparameter_widgets['d_model'] = widgets.IntSlider(
                value=model_config.get('d_model', 256),
                min=128, max=512, step=64,
                description='D Model:',
                style={'description_width': 'initial'}
            )
            hyperparameter_widgets['nhead'] = widgets.IntSlider(
                value=model_config.get('nhead', 8),
                min=4, max=16, step=2,
                description='Num Heads:',
                style={'description_width': 'initial'}
            )
            widgets_list.extend([
                hyperparameter_widgets['d_model'],
                hyperparameter_widgets['nhead']
            ])
        
        # Hybrid系モデル
        elif 'Hybrid' in evaluator.selected_model:
            hyperparameter_widgets['lstm_hidden_dim'] = widgets.IntSlider(
                value=model_config.get('lstm_hidden_dim', 128),
                min=64, max=256, step=32,
                description='LSTM Hidden Dim:',
                style={'description_width': 'initial'}
            )
            hyperparameter_widgets['diffusion_hidden_dim'] = widgets.IntSlider(
                value=model_config.get('diffusion_hidden_dim', 256),
                min=128, max=512, step=64,
                description='Diffusion Hidden Dim:',
                style={'description_width': 'initial'}
            )
            widgets_list.extend([
                hyperparameter_widgets['lstm_hidden_dim'],
                hyperparameter_widgets['diffusion_hidden_dim']
            ])
        
        # UNet固有
        elif evaluator.selected_model == 'UNet':
            hyperparameter_widgets['base_channels'] = widgets.IntSlider(
                value=model_config.get('base_channels', 128),
                min=64, max=256, step=32,
                description='Base Channels:',
                style={'description_width': 'initial'}
            )
            widgets_list.append(hyperparameter_widgets['base_channels'])
    
    # 更新ボタン
    update_config_btn = widgets.Button(
        description='設定を適用',
        button_style='success',
        icon='check'
    )
    
    def on_update_config(b):
        update_config_from_widgets()
        print("✓ 設定が更新されました")
    
    update_config_btn.on_click(on_update_config)
    widgets_list.append(update_config_btn)
    
    hyperparameter_container.children = widgets_list

def update_config_from_widgets():
    """ウィジェットの値から設定を更新"""
    if evaluator.config is None:
        return
    
    # 訓練設定の更新
    if 'training' in evaluator.config:
        if 'batch_size' in hyperparameter_widgets:
            evaluator.config['training']['batch_size'] = hyperparameter_widgets['batch_size'].value
        if 'learning_rate' in hyperparameter_widgets:
            evaluator.config['training']['learning_rate'] = hyperparameter_widgets['learning_rate'].value
        if 'epochs' in hyperparameter_widgets:
            epochs_key = 'epochs' if 'epochs' in evaluator.config['training'] else 'num_epochs'
            evaluator.config['training'][epochs_key] = hyperparameter_widgets['epochs'].value
    
    # モデル設定の更新
    if 'model' in evaluator.config:
        for key, widget in hyperparameter_widgets.items():
            if key in ['batch_size', 'learning_rate', 'epochs']:
                continue
            if key in evaluator.config['model']:
                evaluator.config['model'][key] = widget.value

display(hyperparameter_container)

VBox()

## 2. 学習機能

In [5]:
# CLAUDE_ADDED
training_output = widgets.Output()

def run_training():
    """選択されたモデルで学習を実行"""
    if evaluator.selected_model is None or evaluator.config is None:
        print("モデルまたは設定が選択されていません")
        return
    
    with training_output:
        clear_output(wait=True)
        print(f"🚀 {evaluator.selected_model} の学習を開始します...")
        
        # 設定を更新
        update_config_from_widgets()
        
        # 一時的な設定ファイルを作成
        model_dir = evaluator.models[evaluator.selected_model]
        temp_config_path = Path(model_dir) / 'temp_config.yaml'
        
        with open(temp_config_path, 'w', encoding='utf-8') as f:
            yaml.dump(evaluator.config, f, default_flow_style=False, allow_unicode=True)
        
        try:
            # 学習スクリプトを実行
            train_script = Path(model_dir) / 'train.py'
            
            if train_script.exists():
                cmd = f"cd {model_dir} && python train.py"
                print(f"実行コマンド: {cmd}")
                print("学習中... (しばらくお待ちください)")
                
                # subprocess を使用して学習を実行
                result = subprocess.run(
                    cmd, 
                    shell=True, 
                    capture_output=True, 
                    text=True, 
                    cwd=os.getcwd()
                )
                
                if result.returncode == 0:
                    print("✓ 学習が完了しました！")
                    if result.stdout:
                        print("出力:")
                        print(result.stdout[-1000:])  # 最後の1000文字のみ表示
                else:
                    print(f"✗ 学習中にエラーが発生しました (終了コード: {result.returncode})")
                    print("エラー出力:")
                    print(result.stderr)
            else:
                print(f"✗ 学習スクリプトが見つかりません: {train_script}")
                
        except Exception as e:
            print(f"✗ 学習実行中にエラーが発生しました: {str(e)}")
        
        finally:
            # 一時ファイルを削除
            if temp_config_path.exists():
                temp_config_path.unlink()

# 学習ボタン
train_button = widgets.Button(
    description='学習実行',
    button_style='primary',
    icon='play'
)

train_button.on_click(lambda b: run_training())

display(widgets.VBox([
    widgets.HTML("<h3>学習実行</h3>"),
    train_button,
    training_output
]))

VBox(children=(HTML(value='<h3>学習実行</h3>'), Button(button_style='primary', description='学習実行', icon='play', st…

## 3. データ生成・可視化機能

In [6]:
# CLAUDE_ADDED
# データ読み込み
dataset_path_widget = widgets.Text(
    value="../data/Datasets/overfitting_dataset.npz",
    description='データセットパス:',
    style={'description_width': 'initial'}
)

load_data_button = widgets.Button(
    description='データ読み込み',
    button_style='info',
    icon='download'
)

data_info_output = widgets.Output()

def load_dataset_handler(b):
    with data_info_output:
        clear_output(wait=True)
        dataset = evaluator.load_dataset(dataset_path_widget.value)
        if dataset is not None:
            print(f"✓ データセット読み込み完了")
            print(f"ファイル内容: {list(dataset.keys())}")
            
            # データの形状を表示
            for key in dataset.keys():
                print(f"{key}: {dataset[key].shape}")
        else:
            print("✗ データセットの読み込みに失敗しました")

load_data_button.on_click(load_dataset_handler)

display(widgets.VBox([
    widgets.HTML("<h3>データセット読み込み</h3>"),
    dataset_path_widget,
    load_data_button,
    data_info_output
]))

# 初期データ読み込み
load_dataset_handler(None)

VBox(children=(HTML(value='<h3>データセット読み込み</h3>'), Text(value='../data/Datasets/overfitting_dataset.npz', descr…

In [9]:
# CLAUDE_ADDED
# チェックポイント選択と生成機能
checkpoint_dropdown = widgets.Dropdown(
    options=[],
    description='チェックポイント:',
    style={'description_width': 'initial'}
)

refresh_checkpoints_btn = widgets.Button(
    description='チェックポイント更新',
    button_style='info',
    icon='refresh'
)

def refresh_checkpoints(b):
    if evaluator.selected_model:
        checkpoints = evaluator.get_available_checkpoints(evaluator.selected_model)
        checkpoint_dropdown.options = checkpoints
        print(f"✓ {len(checkpoints)}個のチェックポイントが見つかりました")
    else:
        print("モデルを先に選択してください")

refresh_checkpoints_btn.on_click(refresh_checkpoints)

# 生成設定 - HybridTransformerのtrain.pyに合わせて調整
num_samples_widget = widgets.IntSlider(
    value=9,  # train.pyのデフォルトに合わせる
    min=1, max=20, step=1,
    description='生成サンプル数:',
    style={'description_width': 'initial'}
)

condition_mode = widgets.RadioButtons(
    options=['ランダム選択', '条件指定'],
    value='ランダム選択',
    description='条件設定:',
    style={'description_width': 'initial'}
)

# 条件指定用ウィジェット（5次元の条件ベクトル）
condition_widgets = []
for i in range(5):
    widget = widgets.FloatSlider(
        value=0.0,
        min=-3.0, max=3.0, step=0.1,
        description=f'条件{i+1}:',
        style={'description_width': 'initial'}
    )
    condition_widgets.append(widget)

condition_container = widgets.VBox(condition_widgets)

def on_condition_mode_change(change):
    if change['new'] == '条件指定':
        condition_container.layout.display = 'block'
    else:
        condition_container.layout.display = 'none'

condition_mode.observe(on_condition_mode_change, names='value')
condition_container.layout.display = 'none'  # 初期は非表示

# 生成・可視化
generate_button = widgets.Button(
    description='軌道生成・可視化',
    button_style='success',
    icon='chart-line'
)

generation_output = widgets.Output()

def get_generate_command(model_name, checkpoint_path, conditions_path, num_samples):
    """モデル別の生成コマンドを取得"""
    model_dir = evaluator.models[model_name]
    
    # チェックポイントパスをモデルディレクトリからの相対パスに変換
    checkpoint_path_obj = Path(checkpoint_path)
    model_dir_path = Path(model_dir)
    
    # チェックポイントパスがモデルディレクトリを含む場合、それ以降の部分を取得
    if model_dir in str(checkpoint_path):
        # "HybridTransformer/outputs/checkpoints/file.pth" -> "outputs/checkpoints/file.pth"
        relative_path = str(checkpoint_path).split(model_dir + "/", 1)[-1]
    else:
        # 絶対パスの場合、モデルディレクトリからの相対パスを計算
        try:
            abs_checkpoint = checkpoint_path_obj.absolute()
            abs_model_dir = model_dir_path.absolute()
            relative_path = abs_checkpoint.relative_to(abs_model_dir)
        except ValueError:
            # 相対パス計算に失敗した場合は元のパスを使用
            relative_path = checkpoint_path
    
    if model_name == 'DiffWave':
        cmd = f"cd {model_dir} && python generate.py --checkpoint {relative_path} --batch_size {num_samples} --use_dummy"
    elif model_name in ['HybridModel', 'HybridTransformer']:
        # HybridTransformerの場合、train.pyのgenerate_samplesと同じ方式を使用
        cmd = f"cd {model_dir} && python train.py --mode generate --model_path {relative_path} --num_samples {num_samples}"
    elif model_name == 'Transformer':
        cmd = f"cd {model_dir} && python generate.py --checkpoint {relative_path} --num_samples {num_samples}"
    elif model_name == 'UNet':
        cmd = f"cd {model_dir} && python generate.py --checkpoint {relative_path} --batch_size {num_samples}"
    else:
        # デフォルト
        cmd = f"cd {model_dir} && python generate.py --checkpoint {relative_path}"
    
    return cmd

def generate_and_visualize(b):
    with generation_output:
        clear_output(wait=True)
        
        if evaluator.selected_model is None:
            print("モデルが選択されていません")
            return
        
        if not checkpoint_dropdown.value:
            print("チェックポイントが選択されていません")
            return
        
        if evaluator.dataset is None:
            print("データセットが読み込まれていません")
            return
        
        print(f"🎯 {evaluator.selected_model} で軌道生成を開始...")
        print(f"チェックポイント: {checkpoint_dropdown.value}")
        
        # パス解析のデバッグ情報
        checkpoint_path = checkpoint_dropdown.value
        model_dir = evaluator.models[evaluator.selected_model]
        print(f"モデルディレクトリ: {model_dir}")
        
        if model_dir in str(checkpoint_path):
            relative_path = str(checkpoint_path).split(model_dir + "/", 1)[-1]
            print(f"変換後の相対パス: {relative_path}")
        else:
            print(f"パス変換が必要: {checkpoint_path}")
        
        try:
            # 生成用の条件ベクトルを準備
            if condition_mode.value == 'ランダム選択':
                # データセットからランダムに条件を選択
                if 'conditions' in evaluator.dataset:
                    conditions = evaluator.dataset['conditions']
                    indices = np.random.choice(len(conditions), num_samples_widget.value, replace=False)
                    selected_conditions = conditions[indices]
                    selected_trajectories = evaluator.dataset['trajectories'][indices]
                else:
                    print("データセットに条件情報がありません")
                    return
            else:
                # 手動で指定された条件を使用
                condition_vector = np.array([w.value for w in condition_widgets])
                selected_conditions = np.tile(condition_vector, (num_samples_widget.value, 1))
                
                # 最も近い実軌道を検索（比較用）
                if 'conditions' in evaluator.dataset:
                    distances = np.linalg.norm(evaluator.dataset['conditions'] - condition_vector, axis=1)
                    closest_indices = np.argsort(distances)[:num_samples_widget.value]
                    selected_trajectories = evaluator.dataset['trajectories'][closest_indices]
                else:
                    selected_trajectories = None
            
            # 生成スクリプトを実行
            model_dir = evaluator.models[evaluator.selected_model]
            
            # HybridTransformerの場合はtrain.pyのgenerate_samples関数を使用
            if evaluator.selected_model in ['HybridModel', 'HybridTransformer']:
                # モデル別のコマンド生成
                cmd = get_generate_command(
                    evaluator.selected_model, 
                    checkpoint_dropdown.value, 
                    None,  # 条件パスは不要（train.pyが内部で処理）
                    num_samples_widget.value
                )
                
                print(f"実行コマンド: {cmd}")
                
                result = subprocess.run(
                    cmd,
                    shell=True,
                    capture_output=True,
                    text=True,
                    cwd=os.getcwd()
                )
                
                if result.returncode == 0:
                    print("✓ 軌道生成完了！")
                    if result.stdout:
                        print("生成ログ:", result.stdout[-500:])  # 最後の500文字
                    
                    # train.pyで生成された結果を読み込んで、notebook内で可視化
                    output_files = [
                        f"{model_dir}/outputs/generated_trajectories/generated_samples.npy",
                        f"{model_dir}/outputs/generated_trajectories/sampled_conditions.npy"
                    ]
                    
                    print("生成ファイル:")
                    for file_path in output_files:
                        if Path(file_path).exists():
                            print(f"  ✓ {file_path}")
                        else:
                            print(f"  ✗ {file_path} (not found)")
                    
                    # notebook内で可視化を実行
                    visualize_results(model_dir, selected_trajectories, selected_conditions)
                    
                else:
                    print(f"✗ 生成中にエラーが発生しました (終了コード: {result.returncode}):")
                    print("stderr:", result.stderr)
                    if result.stdout:
                        print("stdout:", result.stdout)
            
            else:
                # 他のモデルの場合は従来の処理
                generate_script = Path(model_dir) / 'generate.py'
                
                if generate_script.exists():
                    # 生成条件を一時ファイルに保存
                    temp_conditions_path = Path(model_dir) / 'temp_conditions.npy'
                    np.save(temp_conditions_path, selected_conditions)
                    
                    # モデル別のコマンド生成
                    cmd = get_generate_command(
                        evaluator.selected_model, 
                        checkpoint_dropdown.value, 
                        str(temp_conditions_path),
                        num_samples_widget.value
                    )
                    
                    print(f"実行コマンド: {cmd}")
                    
                    result = subprocess.run(
                        cmd,
                        shell=True,
                        capture_output=True,
                        text=True,
                        cwd=os.getcwd()
                    )
                    
                    if result.returncode == 0:
                        print("✓ 軌道生成完了！")
                        if result.stdout:
                            print("生成ログ:", result.stdout[-500:])  # 最後の500文字
                        
                        # 生成結果を読み込んで可視化
                        visualize_results(model_dir, selected_trajectories, selected_conditions)
                        
                    else:
                        print(f"✗ 生成中にエラーが発生しました (終了コード: {result.returncode}):")
                        print("stderr:", result.stderr)
                        if result.stdout:
                            print("stdout:", result.stdout)
                    
                    # 一時ファイルを削除
                    if temp_conditions_path.exists():
                        temp_conditions_path.unlink()
                        
                else:
                    print(f"✗ 生成スクリプトが見つかりません: {generate_script}")
                    
        except Exception as e:
            print(f"✗ 生成中にエラーが発生しました: {str(e)}")
            import traceback
            print("詳細なエラー情報:")
            print(traceback.format_exc())

def visualize_results(model_dir, real_trajectories, conditions):
    """生成結果と実軌道を可視化 - notebook内でグラフ表示"""
    # 生成された軌道を読み込み - HybridTransformerのtrain.pyに合わせたパス優先順位
    potential_patterns = [
        # HybridTransformerのtrain.pyで使用されるパス
        f"{model_dir}/outputs/generated_trajectories/generated_samples.npy",
        # 他のモデル用のパターン
        f"{model_dir}/generated_outputs/data/generated_trajectories.npy",
        f"{model_dir}/outputs/generated_trajectories/*.npy",
        f"{model_dir}/generated_outputs/**/*.npy", 
        f"{model_dir}/generated_*.npy",
        f"{model_dir}/outputs/**/*.npy"
    ]
    
    generated_files = []
    
    for pattern in potential_patterns:
        if '*' in pattern:
            # Globパターンの場合
            files = list(Path('.').glob(pattern))
        else:
            # 直接パスの場合
            file_path = Path(pattern)
            if file_path.exists():
                files = [file_path]
            else:
                files = []
        
        generated_files.extend(files)
        if files:  # ファイルが見つかったら優先的に使用
            break
    
    if not generated_files:
        print("生成された軌道ファイルが見つかりません")
        print("探索したパターン:")
        for i, pattern in enumerate(potential_patterns):
            print(f"  {i+1}. {pattern}")
        
        # ディレクトリ内容を確認
        print(f"\n{model_dir}ディレクトリ内容:")
        try:
            for item in Path(model_dir).rglob('*.npy'):
                print(f"  - {item}")
        except Exception as e:
            print(f"ディレクトリ探索エラー: {e}")
        return
    
    # 最新の生成ファイルを使用
    latest_file = max(generated_files, key=lambda x: x.stat().st_mtime)
    
    try:
        generated_trajectories = np.load(latest_file)
        print(f"生成軌道読み込み: {latest_file}")
        print(f"生成軌道形状: {generated_trajectories.shape}")
        
        # 条件データも読み込み（HybridTransformerの場合）
        conditions_file = Path(f"{model_dir}/outputs/generated_trajectories/sampled_conditions.npy")
        if conditions_file.exists():
            sampled_conditions = np.load(conditions_file)
            print(f"条件データ読み込み: {conditions_file}")
            print(f"条件データ形状: {sampled_conditions.shape}")
        else:
            sampled_conditions = conditions
        
        # データ形状の検証と修正
        if len(generated_trajectories.shape) == 2:
            # 2次元の場合、軌道データかどうか確認
            if generated_trajectories.shape[1] == 2:
                # (timesteps, coords) の場合 -> (1, timesteps, coords) に変換
                generated_trajectories = generated_trajectories[np.newaxis, :, :]
                print(f"2次元データを1サンプル軌道として解釈: {generated_trajectories.shape}")
            elif generated_trajectories.shape[1] == 5:
                # 条件データの場合はエラー
                print("✗ 軌道データではなく条件データが読み込まれました")
                return
            else:
                print(f"✗ 予期しないデータ形状: {generated_trajectories.shape}")
                return
        elif len(generated_trajectories.shape) == 3:
            # 3次元の場合は正常
            if generated_trajectories.shape[2] != 2:
                print(f"✗ 座標次元が2ではありません: {generated_trajectories.shape}")
                return
        else:
            print(f"✗ サポートされていないデータ次元: {generated_trajectories.shape}")
            return
        
        # データ形状の調整
        if len(generated_trajectories.shape) == 3:
            if generated_trajectories.shape[0] > generated_trajectories.shape[1] and generated_trajectories.shape[2] == 2:
                # (timesteps, samples, coords) の場合 -> (samples, timesteps, coords)
                generated_trajectories = generated_trajectories.transpose(1, 0, 2)
                print(f"軸を入れ替え: {generated_trajectories.shape}")
        
    except Exception as e:
        print(f"✗ ファイル読み込みエラー: {e}")
        return
    
    # notebook内でHybridTransformerのtrain.pyスタイルの可視化
    # train.pyでは各条件に対して3つのサンプルを生成するので、それに合わせる
    num_conditions = num_samples_widget.value
    samples_per_condition = 3  # train.pyに合わせて固定
    
    # 実際に生成されたサンプル数を計算
    total_generated = len(generated_trajectories)
    actual_conditions = min(num_conditions, total_generated // samples_per_condition)
    
    # train.pyのプロット方式に合わせて、最大9個の条件を3x3グリッドで表示
    max_display_samples = min(9, actual_conditions)
    
    if max_display_samples <= 6:
        fig_rows, fig_cols = 2, 3
    elif max_display_samples <= 9:
        fig_rows, fig_cols = 3, 3
    else:
        fig_rows, fig_cols = 3, 3
        max_display_samples = 9
    
    # notebook内でmatplotlibグラフを表示
    plt.figure(figsize=(5*fig_cols, 4*fig_rows))
    
    for i in range(max_display_samples):
        plt.subplot(fig_rows, fig_cols, i+1)
        
        # 元の軌道（利用可能な場合）
        if real_trajectories is not None and i < len(real_trajectories):
            real_traj = real_trajectories[i]
            plt.plot(real_traj[:, 0], real_traj[:, 1], 
                    'k-', linewidth=3, label='Original', alpha=0.8)
        
        # 生成された軌道 (train.pyスタイル: 各条件に3つのサンプル)
        colors = ['red', 'blue', 'green', 'orange', 'purple']
        for j in range(samples_per_condition):
            gen_idx = i * samples_per_condition + j
            if gen_idx < len(generated_trajectories):
                color = colors[j % len(colors)]
                plt.plot(generated_trajectories[gen_idx, :, 0], generated_trajectories[gen_idx, :, 1],
                        '-', color=color, linewidth=2, alpha=0.7, label=f'Generated {j+1}')
        
        plt.xlabel('X Position')
        plt.ylabel('Y Position')
        plt.title(f'Condition {i+1}')
        plt.legend()
        plt.grid(True, alpha=0.3)
        plt.axis('equal')
    
    # train.pyスタイルのタイトル
    if num_conditions > max_display_samples:
        plt.suptitle(f'Generation Results ({max_display_samples}/{num_conditions} samples displayed)', fontsize=16)
    else:
        plt.suptitle('Generation Results', fontsize=16)
    
    plt.tight_layout()
    plt.show()  # notebook内でグラフを表示
    
    # 追加で統計情報のプロットも表示
    create_statistics_plot(generated_trajectories, real_trajectories, sampled_conditions, samples_per_condition)
    
    # 統計情報の表示
    print(f"\n📊 Generation Statistics:")
    print(f"  Total generated samples: {total_generated}")
    print(f"  Conditions displayed: {max_display_samples}")
    print(f"  Samples per condition: {samples_per_condition}")
    print(f"  Trajectory length: {generated_trajectories.shape[1]}")
    print(f"  Coordinate dimensions: {generated_trajectories.shape[2]}")
    
    # 軌道の範囲情報
    print(f"\n📏 Trajectory Range:")
    print(f"  X: [{generated_trajectories[:,:,0].min():.3f}, {generated_trajectories[:,:,0].max():.3f}]")
    print(f"  Y: [{generated_trajectories[:,:,1].min():.3f}, {generated_trajectories[:,:,1].max():.3f}]")
    
    # 誤差計算（実軌道が利用可能な場合）
    if real_trajectories is not None and len(real_trajectories) >= max_display_samples:
        # train.pyスタイルに合わせて各条件の最初のサンプルのみで誤差計算
        selected_generated = []
        for i in range(max_display_samples):
            gen_idx = i * samples_per_condition  # 各条件の最初のサンプル
            if gen_idx < len(generated_trajectories):
                selected_generated.append(generated_trajectories[gen_idx])
        
        if selected_generated:
            selected_generated = np.array(selected_generated)
            selected_real = real_trajectories[:len(selected_generated)]
            
            errors = calculate_trajectory_errors(selected_generated, selected_real)
            
            print(f"\n⚠️ Error Statistics (first sample per condition):")
            print(f"  MSE: {errors['mse']:.6f}")
            print(f"  Endpoint Error: {errors['endpoint_error']:.6f}")
            print(f"  Path Length Error: {errors['length_error']:.6f}")
            print(f"  Max Deviation: {errors['max_deviation']:.6f}")

def create_statistics_plot(generated_trajectories, real_trajectories, conditions, samples_per_condition):
    """統計的分析プロットをnotebook内で表示"""
    plt.figure(figsize=(15, 10))
    
    # 軌道長の分布
    plt.subplot(2, 3, 1)
    generated_lengths = [np.sum(np.sqrt(np.sum(np.diff(traj, axis=0)**2, axis=1))) for traj in generated_trajectories]
    
    if real_trajectories is not None:
        original_lengths = [np.sum(np.sqrt(np.sum(np.diff(traj, axis=0)**2, axis=1))) for traj in real_trajectories]
        plt.hist(original_lengths, bins=20, alpha=0.7, label='Original', color='black')
    
    plt.hist(generated_lengths, bins=20, alpha=0.7, label='Generated', color='red')
    plt.xlabel('Path Length')
    plt.ylabel('Frequency')
    plt.title('Path Length Distribution')
    plt.legend()
    plt.grid(True, alpha=0.3)
    
    # 最大変位の分布
    plt.subplot(2, 3, 2)
    generated_max_disp = [np.max(np.sqrt(traj[:, 0]**2 + traj[:, 1]**2)) for traj in generated_trajectories]
    
    if real_trajectories is not None:
        original_max_disp = [np.max(np.sqrt(traj[:, 0]**2 + traj[:, 1]**2)) for traj in real_trajectories]
        plt.hist(original_max_disp, bins=20, alpha=0.7, label='Original', color='black')
    
    plt.hist(generated_max_disp, bins=20, alpha=0.7, label='Generated', color='red')
    plt.xlabel('Max Displacement')
    plt.ylabel('Frequency')
    plt.title('Max Displacement Distribution')
    plt.legend()
    plt.grid(True, alpha=0.3)
    
    # 終点位置の分布
    plt.subplot(2, 3, 3)
    if real_trajectories is not None:
        plt.scatter(real_trajectories[:, -1, 0], real_trajectories[:, -1, 1], 
                   alpha=0.7, label='Original', color='black', s=50)
    
    plt.scatter(generated_trajectories[:, -1, 0], generated_trajectories[:, -1, 1], 
               alpha=0.5, label='Generated', color='red', s=30)
    plt.xlabel('Endpoint X')
    plt.ylabel('Endpoint Y')
    plt.title('Endpoint Position Distribution')
    plt.legend()
    plt.grid(True, alpha=0.3)
    plt.axis('equal')
    
    # 各条件での生成多様性（HybridTransformerスタイル）
    plt.subplot(2, 3, 4)
    num_conditions = len(generated_trajectories) // samples_per_condition
    diversities = []
    
    for i in range(num_conditions):
        start_idx = i * samples_per_condition
        end_idx = min(start_idx + samples_per_condition, len(generated_trajectories))
        gen_samples = generated_trajectories[start_idx:end_idx]
        
        # サンプル間の平均距離を計算
        distances = []
        for j in range(len(gen_samples)):
            for k in range(j+1, len(gen_samples)):
                dist = np.mean(np.sqrt(np.sum((gen_samples[j] - gen_samples[k])**2, axis=1)))
                distances.append(dist)
        diversities.append(np.mean(distances) if distances else 0)
    
    plt.bar(range(num_conditions), diversities, alpha=0.7, color='green')
    plt.xlabel('Condition Index')
    plt.ylabel('Intra-Condition Diversity')
    plt.title('Diversity per Condition')
    plt.grid(True, alpha=0.3)
    
    # 条件パラメータと多様性の関係
    plt.subplot(2, 3, 5)
    if conditions is not None and len(conditions) >= num_conditions:
        condition_names = ['Movement Time', 'Endpoint Error', 'Jerk', 'Goal X', 'Goal Y']
        for i, name in enumerate(condition_names):
            if i < conditions.shape[1]:
                plt.scatter(conditions[:num_conditions, i], diversities, alpha=0.7, label=name)
        plt.xlabel('Condition Parameter Value')
        plt.ylabel('Intra-Condition Diversity')
        plt.title('Condition vs Diversity Relationship')
        plt.legend()
        plt.grid(True, alpha=0.3)
    else:
        plt.text(0.5, 0.5, 'Condition data not available', 
                ha='center', va='center', transform=plt.gca().transAxes)
        plt.title('Condition vs Diversity Relationship')
    
    # サンプル数の情報
    plt.subplot(2, 3, 6)
    info_text = f"""Generation Statistics:

Conditions: {num_conditions}
Samples per Condition: {samples_per_condition}
Total Generated Samples: {len(generated_trajectories)}
Sequence Length: {generated_trajectories.shape[1]}
Average Intra-Condition Diversity: {np.mean(diversities):.4f}

Coordinate Range:
X: [{generated_trajectories[:,:,0].min():.2f}, {generated_trajectories[:,:,0].max():.2f}]
Y: [{generated_trajectories[:,:,1].min():.2f}, {generated_trajectories[:,:,1].max():.2f}]"""
    
    plt.text(0.1, 0.9, info_text, fontsize=11, verticalalignment='top',
            transform=plt.gca().transAxes,
            bbox=dict(boxstyle='round', facecolor='lightblue', alpha=0.5))
    plt.axis('off')
    plt.title('Generation Statistics')
    
    plt.tight_layout()
    plt.suptitle('Statistical Analysis of Generated Trajectories', fontsize=16, y=1.02)
    plt.show()  # notebook内でグラフを表示

def calculate_trajectory_errors(generated, real):
    """軌道間の各種誤差を計算"""
    errors = {}
    
    # MSE
    mse = np.mean((generated - real) ** 2)
    errors['mse'] = mse
    
    # 終点誤差
    endpoint_errors = np.linalg.norm(generated[:, -1] - real[:, -1], axis=1)
    errors['endpoint_error'] = np.mean(endpoint_errors)
    
    # 軌道長誤差
    gen_lengths = np.sum(np.linalg.norm(np.diff(generated, axis=1), axis=2), axis=1)
    real_lengths = np.sum(np.linalg.norm(np.diff(real, axis=1), axis=2), axis=1)
    errors['length_error'] = np.mean(np.abs(gen_lengths - real_lengths))
    
    # 最大偏差
    deviations = np.linalg.norm(generated - real, axis=2)
    errors['max_deviation'] = np.max(deviations)
    
    return errors

generate_button.on_click(generate_and_visualize)

display(widgets.VBox([
    widgets.HTML("<h3>軌道生成・可視化</h3>"),
    widgets.HBox([checkpoint_dropdown, refresh_checkpoints_btn]),
    num_samples_widget,
    condition_mode,
    condition_container,
    generate_button,
    generation_output
]))

VBox(children=(HTML(value='<h3>軌道生成・可視化</h3>'), HBox(children=(Dropdown(description='チェックポイント:', options=(), s…

## 4. 全体評価機能

In [10]:
# CLAUDE_ADDED
# 全体評価機能
evaluation_samples_widget = widgets.IntSlider(
    value=9,  # HybridTransformerのtrain.pyに合わせる
    min=3, max=30, step=3,  # 3の倍数で設定（3サンプル/条件のため）
    description='評価サンプル数:',
    style={'description_width': 'initial'}
)

evaluate_button = widgets.Button(
    description='全体性能評価',
    button_style='warning',
    icon='chart-bar'
)

evaluation_output = widgets.Output()

def get_evaluation_command(model_name, checkpoint_path, num_samples):
    """評価用のモデル別コマンドを取得 - HybridTransformerのtrain.pyに合わせる"""
    model_dir = evaluator.models[model_name]
    
    # チェックポイントパスをモデルディレクトリからの相対パスに変換
    if model_dir in str(checkpoint_path):
        relative_path = str(checkpoint_path).split(model_dir + "/", 1)[-1]
    else:
        try:
            checkpoint_path_obj = Path(checkpoint_path)
            model_dir_path = Path(model_dir)
            abs_checkpoint = checkpoint_path_obj.absolute()
            abs_model_dir = model_dir_path.absolute()
            relative_path = abs_checkpoint.relative_to(abs_model_dir)
        except ValueError:
            relative_path = checkpoint_path
    
    if model_name == 'DiffWave':
        cmd = f"cd {model_dir} && python generate.py --checkpoint {relative_path} --batch_size {num_samples} --use_dummy"
    elif model_name in ['HybridModel', 'HybridTransformer']:
        # HybridTransformerの場合、train.pyのgenerate_samplesを使用
        cmd = f"cd {model_dir} && python train.py --mode generate --model_path {relative_path} --num_samples {num_samples}"
    elif model_name == 'Transformer':
        cmd = f"cd {model_dir} && python generate.py --checkpoint {relative_path} --num_samples {num_samples}"
    elif model_name == 'UNet':
        cmd = f"cd {model_dir} && python generate.py --checkpoint {relative_path} --batch_size {num_samples}"
    else:
        cmd = f"cd {model_dir} && python generate.py --checkpoint {relative_path}"
    
    return cmd

def comprehensive_evaluation(b):
    with evaluation_output:
        clear_output(wait=True)
        
        if evaluator.selected_model is None or evaluator.dataset is None:
            print("モデルまたはデータセットが選択されていません")
            return
        
        if not checkpoint_dropdown.value:
            print("チェックポイントが選択されていません")
            return
        
        print(f"📊 {evaluator.selected_model} の全体性能評価を開始...")
        
        try:
            # 評価用のランダムサンプルを選択
            n_samples = evaluation_samples_widget.value
            total_samples = len(evaluator.dataset['trajectories'])
            indices = np.random.choice(total_samples, min(n_samples, total_samples), replace=False)
            
            test_conditions = evaluator.dataset['conditions'][indices]
            test_trajectories = evaluator.dataset['trajectories'][indices]
            
            print(f"評価サンプル数: {len(test_conditions)}")
            print(f"チェックポイント: {checkpoint_dropdown.value}")
            
            # バッチ生成実行
            model_dir = evaluator.models[evaluator.selected_model]
            
            # HybridTransformerの場合はtrain.pyのgenerate_samples関数を使用
            if evaluator.selected_model in ['HybridModel', 'HybridTransformer']:
                # 評価用コマンド生成
                cmd = get_evaluation_command(
                    evaluator.selected_model, 
                    checkpoint_dropdown.value, 
                    len(test_conditions)
                )
                
                print(f"実行コマンド: {cmd}")
                
                result = subprocess.run(
                    cmd,
                    shell=True,
                    capture_output=True,
                    text=True,
                    cwd=os.getcwd()
                )
                
                if result.returncode == 0:
                    print("✓ バッチ生成完了")
                    
                    # HybridTransformerのtrain.pyで生成された結果を読み込み
                    generated_files_patterns = [
                        f"{model_dir}/outputs/generated_trajectories/generated_samples.npy"
                    ]
                    
                    generated_files = []
                    for pattern in generated_files_patterns:
                        file_path = Path(pattern)
                        if file_path.exists():
                            generated_files.append(file_path)
                    
                    if generated_files:
                        latest_file = max(generated_files, key=lambda x: x.stat().st_mtime)
                        generated_trajectories = np.load(latest_file)
                        
                        print(f"生成軌道読み込み: {latest_file}")
                        print(f"生成軌道形状: {generated_trajectories.shape}")
                        
                        # データ形状の調整
                        if len(generated_trajectories.shape) == 3:
                            if generated_trajectories.shape[0] > generated_trajectories.shape[1] and generated_trajectories.shape[2] == 2:
                                generated_trajectories = generated_trajectories.transpose(1, 0, 2)
                                print(f"軸を入れ替え: {generated_trajectories.shape}")
                        
                        # HybridTransformerスタイルの詳細評価実行（notebook内でグラフ表示）
                        detailed_evaluation_hybrid_style(generated_trajectories, test_trajectories, test_conditions)
                    else:
                        print("生成された軌道ファイルが見つかりません")
                        print("探索したパターン:")
                        for i, pattern in enumerate(generated_files_patterns):
                            print(f"  {i+1}. {pattern}")
                else:
                    print(f"✗ バッチ生成に失敗しました (終了コード: {result.returncode}):")
                    print("stderr:", result.stderr)
                    if result.stdout:
                        print("stdout:", result.stdout)
            
            else:
                # 他のモデルの場合は従来の処理
                temp_conditions_path = Path(model_dir) / 'eval_conditions.npy'
                np.save(temp_conditions_path, test_conditions)
                
                # 評価用コマンド生成
                cmd = get_evaluation_command(
                    evaluator.selected_model, 
                    checkpoint_dropdown.value, 
                    len(test_conditions)
                )
                
                print(f"実行コマンド: {cmd}")
                
                result = subprocess.run(
                    cmd,
                    shell=True,
                    capture_output=True,
                    text=True,
                    cwd=os.getcwd()
                )
                
                if result.returncode == 0:
                    print("✓ バッチ生成完了")
                    
                    # 生成結果を読み込み - 優先順位付きで検索
                    potential_patterns = [
                        f"{model_dir}/generated_outputs/data/generated_trajectories.npy",
                        f"{model_dir}/outputs/generated_trajectories/*.npy",
                        f"{model_dir}/generated_outputs/**/*.npy",
                        f"{model_dir}/generated_*.npy",
                        f"{model_dir}/outputs/**/*.npy"
                    ]
                    
                    generated_files = []
                    for pattern in potential_patterns:
                        if '*' in pattern:
                            files = list(Path('.').glob(pattern))
                        else:
                            file_path = Path(pattern)
                            if file_path.exists():
                                files = [file_path]
                            else:
                                files = []
                        
                        generated_files.extend(files)
                        if files:
                            break
                    
                    if generated_files:
                        latest_file = max(generated_files, key=lambda x: x.stat().st_mtime)
                        generated_trajectories = np.load(latest_file)
                        
                        print(f"生成軌道読み込み: {latest_file}")
                        print(f"生成軌道形状: {generated_trajectories.shape}")
                        
                        # データ形状の調整
                        if len(generated_trajectories.shape) == 3:
                            if generated_trajectories.shape[0] > generated_trajectories.shape[1] and generated_trajectories.shape[2] == 2:
                                generated_trajectories = generated_trajectories.transpose(1, 0, 2)
                                print(f"軸を入れ替え: {generated_trajectories.shape}")
                        
                        # 詳細評価実行（notebook内でグラフ表示）
                        detailed_evaluation(generated_trajectories, test_trajectories, test_conditions)
                    else:
                        print("生成された軌道ファイルが見つかりません")
                        print("探索したパターン:")
                        for i, pattern in enumerate(potential_patterns):
                            print(f"  {i+1}. {pattern}")
                else:
                    print(f"✗ バッチ生成に失敗しました (終了コード: {result.returncode}):")
                    print("stderr:", result.stderr)
                    if result.stdout:
                        print("stdout:", result.stdout)
                
                # 一時ファイル削除
                if temp_conditions_path.exists():
                    temp_conditions_path.unlink()
                
        except Exception as e:
            print(f"✗ 評価中にエラーが発生しました: {str(e)}")
            import traceback
            print("詳細なエラー情報:")
            print(traceback.format_exc())

def detailed_evaluation_hybrid_style(generated, real, conditions):
    """HybridTransformerスタイルの詳細な性能評価とレポート生成 - notebook内でグラフ表示"""
    print("\n📈 Detailed Evaluation Results (HybridTransformer Style):")
    print("=" * 60)
    
    # HybridTransformerでは各条件に3つのサンプルを生成
    samples_per_condition = 3
    total_generated = len(generated)
    num_conditions = min(len(real), total_generated // samples_per_condition)
    
    print(f"Evaluation samples: {num_conditions}")
    print(f"Total generated trajectories: {total_generated}")
    print(f"Samples per condition: {samples_per_condition}")
    print(f"Trajectory length: {generated.shape[1]}")
    print(f"Coordinate dimensions: {generated.shape[2]}")
    
    # 各条件の最初のサンプルで基本評価
    selected_generated = []
    for i in range(num_conditions):
        gen_idx = i * samples_per_condition  # 各条件の最初のサンプル
        if gen_idx < len(generated):
            selected_generated.append(generated[gen_idx])
    
    selected_generated = np.array(selected_generated)
    selected_real = real[:len(selected_generated)]
    
    # 各種誤差計算
    errors = calculate_comprehensive_errors(selected_generated, selected_real)
    
    print("\n🎯 Trajectory Accuracy:")
    print(f"  Mean Squared Error (MSE): {errors['mse']:.6f}")
    print(f"  Mean Absolute Error (MAE): {errors['mae']:.6f}")
    print(f"  Endpoint Error: {errors['endpoint_error']:.6f} ± {errors['endpoint_std']:.6f}")
    print(f"  Max Deviation: {errors['max_deviation']:.6f}")
    
    print("\n📏 Trajectory Characteristics:")
    print(f"  Path Length Error: {errors['length_error']:.6f} ± {errors['length_std']:.6f}")
    print(f"  Smoothness (Jerk): {errors['jerk_error']:.6f}")
    print(f"  Curvature Difference: {errors['curvature_error']:.6f}")
    
    # HybridTransformerスタイルの多様性評価（各条件内のサンプル間多様性）
    condition_diversities = []
    for i in range(num_conditions):
        start_idx = i * samples_per_condition
        end_idx = min(start_idx + samples_per_condition, len(generated))
        condition_samples = generated[start_idx:end_idx]
        
        if len(condition_samples) > 1:
            diversity_sum = 0
            count = 0
            for j in range(len(condition_samples)):
                for k in range(j+1, len(condition_samples)):
                    diversity_sum += np.mean(np.linalg.norm(condition_samples[j] - condition_samples[k], axis=1))
                    count += 1
            condition_diversity = diversity_sum / count if count > 0 else 0
            condition_diversities.append(condition_diversity)
        else:
            condition_diversities.append(0)
    
    avg_diversity = np.mean(condition_diversities) if condition_diversities else 0
    
    print("\n🎲 Diversity & Consistency (HybridTransformer Style):")
    print(f"  Average intra-condition diversity: {avg_diversity:.6f}")
    print(f"  Condition consistency: {errors['condition_consistency']:.6f}")
    print(f"  Diversity per condition: {np.std(condition_diversities):.6f}")
    
    # HybridTransformerスタイルの可視化（notebook内でグラフ表示）
    create_evaluation_plots_hybrid_style(generated, real, conditions, errors, samples_per_condition)
    
    # 評価サマリー（HybridTransformerスタイル）
    overall_score = calculate_overall_score_hybrid_style(errors, avg_diversity)
    print(f"\n⭐ Overall Score (HybridTransformer Style): {overall_score:.2f}/100")
    
    return errors

def detailed_evaluation(generated, real, conditions):
    """詳細な性能評価とレポート生成（従来スタイル）- notebook内でグラフ表示"""
    print("\n📈 詳細評価結果:")
    print("=" * 50)
    
    # 基本統計
    n_samples = len(generated)
    print(f"評価サンプル数: {n_samples}")
    print(f"軌道長: {generated.shape[1]}")
    print(f"次元数: {generated.shape[2]}")
    
    # 各種誤差計算
    errors = calculate_comprehensive_errors(generated, real)
    
    print("\n🎯 軌道精度:")
    print(f"  平均二乗誤差 (MSE): {errors['mse']:.6f}")
    print(f"  平均絶対誤差 (MAE): {errors['mae']:.6f}")
    print(f"  終点誤差: {errors['endpoint_error']:.6f} ± {errors['endpoint_std']:.6f}")
    print(f"  最大偏差: {errors['max_deviation']:.6f}")
    
    print("\n📏 軌道特性:")
    print(f"  軌道長誤差: {errors['length_error']:.6f} ± {errors['length_std']:.6f}")
    print(f"  滑らかさ (ジャーク): {errors['jerk_error']:.6f}")
    print(f"  曲率差: {errors['curvature_error']:.6f}")
    
    print("\n🎲 多様性・一貫性:")
    print(f"  軌道多様性: {errors['diversity']:.6f}")
    print(f"  条件一貫性: {errors['condition_consistency']:.6f}")
    
    # 可視化（notebook内でグラフ表示）
    create_evaluation_plots(generated, real, conditions, errors)
    
    # 評価サマリー
    overall_score = calculate_overall_score(errors)
    print(f"\n⭐ 総合スコア: {overall_score:.2f}/100")
    
    return errors

def calculate_comprehensive_errors(generated, real):
    """包括的な誤差計算"""
    errors = {}
    
    # 基本誤差
    diff = generated - real
    errors['mse'] = np.mean(diff ** 2)
    errors['mae'] = np.mean(np.abs(diff))
    
    # 終点誤差
    endpoint_errors = np.linalg.norm(generated[:, -1] - real[:, -1], axis=1)
    errors['endpoint_error'] = np.mean(endpoint_errors)
    errors['endpoint_std'] = np.std(endpoint_errors)
    
    # 最大偏差
    deviations = np.linalg.norm(diff, axis=2)
    errors['max_deviation'] = np.max(deviations)
    
    # 軌道長
    gen_lengths = np.sum(np.linalg.norm(np.diff(generated, axis=1), axis=2), axis=1)
    real_lengths = np.sum(np.linalg.norm(np.diff(real, axis=1), axis=2), axis=1)
    length_diffs = np.abs(gen_lengths - real_lengths)
    errors['length_error'] = np.mean(length_diffs)
    errors['length_std'] = np.std(length_diffs)
    
    # ジャーク（滑らかさ）
    gen_jerk = np.sum(np.linalg.norm(np.diff(generated, n=3, axis=1), axis=2) ** 2, axis=1)
    real_jerk = np.sum(np.linalg.norm(np.diff(real, n=3, axis=1), axis=2) ** 2, axis=1)
    errors['jerk_error'] = np.mean(np.abs(gen_jerk - real_jerk))
    
    # 曲率
    def calculate_curvature(traj):
        dx = np.diff(traj[:, :, 0], axis=1)
        dy = np.diff(traj[:, :, 1], axis=1)
        ddx = np.diff(dx, axis=1)
        ddy = np.diff(dy, axis=1)
        curvature = np.abs(dx[:, :-1] * ddy - dy[:, :-1] * ddx) / (dx[:, :-1]**2 + dy[:, :-1]**2 + 1e-8)**(3/2)
        return np.mean(curvature, axis=1)
    
    gen_curvature = calculate_curvature(generated)
    real_curvature = calculate_curvature(real)
    errors['curvature_error'] = np.mean(np.abs(gen_curvature - real_curvature))
    
    # 多様性（生成軌道間の平均距離）
    n_samples = len(generated)
    diversity_sum = 0
    count = 0
    for i in range(n_samples):
        for j in range(i+1, n_samples):
            diversity_sum += np.mean(np.linalg.norm(generated[i] - generated[j], axis=1))
            count += 1
    errors['diversity'] = diversity_sum / count if count > 0 else 0
    
    # 条件一貫性（同じ条件での再現性）
    errors['condition_consistency'] = 1.0 - (errors['mse'] / (errors['mse'] + 1.0))
    
    return errors

def calculate_overall_score(errors):
    """総合スコアを計算（100点満点）"""
    # 各指標を0-100スケールに正規化
    mse_score = max(0, 100 - errors['mse'] * 10000)  # MSEは小さいほど良い
    endpoint_score = max(0, 100 - errors['endpoint_error'] * 100)
    consistency_score = errors['condition_consistency'] * 100
    diversity_score = min(100, errors['diversity'] * 50)  # 適度な多様性
    
    # 重み付け平均
    weights = [0.3, 0.3, 0.2, 0.2]  # MSE, 終点, 一貫性, 多様性
    scores = [mse_score, endpoint_score, consistency_score, diversity_score]
    
    overall = sum(w * s for w, s in zip(weights, scores))
    return max(0, min(100, overall))

def calculate_overall_score_hybrid_style(errors, avg_diversity):
    """HybridTransformerスタイル総合スコアを計算（100点満点）"""
    # 各指標を0-100スケールに正規化
    mse_score = max(0, 100 - errors['mse'] * 10000)
    endpoint_score = max(0, 100 - errors['endpoint_error'] * 100)
    consistency_score = errors['condition_consistency'] * 100
    diversity_score = min(100, avg_diversity * 30)  # HybridTransformerスタイルの多様性評価
    
    # 重み付け平均（HybridTransformerに適したウェイト）
    weights = [0.4, 0.3, 0.2, 0.1]  # MSEを重視, 多様性は軽め
    scores = [mse_score, endpoint_score, consistency_score, diversity_score]
    
    overall = sum(w * s for w, s in zip(weights, scores))
    return max(0, min(100, overall))

def create_evaluation_plots_hybrid_style(generated, real, conditions, errors, samples_per_condition):
    """HybridTransformerスタイルの評価結果可視化 - notebook内でグラフ表示"""
    fig, axes = plt.subplots(2, 3, figsize=(18, 12))
    
    num_conditions = len(real)
    
    # 1. 軌道比較サンプル (HybridTransformerスタイル)
    ax = axes[0, 0]
    n_show = min(3, num_conditions)
    for i in range(n_show):
        # 実軌道
        ax.plot(real[i, :, 0], real[i, :, 1], 'k-', linewidth=3, alpha=0.8, label='Real' if i == 0 else "")
        
        # 各条件の3つのサンプル
        colors = ['red', 'blue', 'green']
        for j in range(samples_per_condition):
            gen_idx = i * samples_per_condition + j
            if gen_idx < len(generated):
                color = colors[j % len(colors)]
                ax.plot(generated[gen_idx, :, 0], generated[gen_idx, :, 1], 
                       '--', color=color, linewidth=2, alpha=0.7,
                       label=f'Generated {j+1}' if i == 0 else "")
    
    ax.set_title('Trajectory Comparison (HybridTransformer Style)')
    ax.legend()
    ax.grid(True, alpha=0.3)
    ax.set_xlabel('X Position')
    ax.set_ylabel('Y Position')
    
    # 2. 条件内多様性分布
    ax = axes[0, 1]
    condition_diversities = []
    for i in range(num_conditions):
        start_idx = i * samples_per_condition
        end_idx = min(start_idx + samples_per_condition, len(generated))
        condition_samples = generated[start_idx:end_idx]
        
        if len(condition_samples) > 1:
            diversity_sum = 0
            count = 0
            for j in range(len(condition_samples)):
                for k in range(j+1, len(condition_samples)):
                    diversity_sum += np.mean(np.linalg.norm(condition_samples[j] - condition_samples[k], axis=1))
                    count += 1
            condition_diversity = diversity_sum / count if count > 0 else 0
            condition_diversities.append(condition_diversity)
        else:
            condition_diversities.append(0)
    
    ax.hist(condition_diversities, bins=15, alpha=0.7, color='purple', edgecolor='black')
    ax.axvline(np.mean(condition_diversities), color='red', linestyle='--', linewidth=2, 
              label=f'Mean: {np.mean(condition_diversities):.4f}')
    ax.set_title('Intra-Condition Diversity Distribution')
    ax.set_xlabel('Diversity')
    ax.set_ylabel('Frequency')
    ax.legend()
    ax.grid(True, alpha=0.3)
    
    # 3. 終点誤差（各条件の最初のサンプル）
    ax = axes[0, 2]
    selected_generated = []
    for i in range(num_conditions):
        gen_idx = i * samples_per_condition
        if gen_idx < len(generated):
            selected_generated.append(generated[gen_idx])
    
    selected_generated = np.array(selected_generated)
    endpoint_errors = np.linalg.norm(selected_generated[:, -1] - real[:len(selected_generated), -1], axis=1)
    ax.scatter(range(len(endpoint_errors)), endpoint_errors, alpha=0.6, c='orange')
    ax.axhline(np.mean(endpoint_errors), color='red', linestyle='--', linewidth=2, 
              label=f'Mean: {np.mean(endpoint_errors):.4f}')
    ax.set_title('Endpoint Errors (First Sample per Condition)')
    ax.set_xlabel('Condition Index')
    ax.set_ylabel('Endpoint Error')
    ax.legend()
    ax.grid(True, alpha=0.3)
    
    # 4. 軌道長比較
    ax = axes[1, 0]
    selected_gen_lengths = np.sum(np.linalg.norm(np.diff(selected_generated, axis=1), axis=2), axis=1)
    real_lengths = np.sum(np.linalg.norm(np.diff(real[:len(selected_generated)], axis=1), axis=2), axis=1)
    ax.scatter(real_lengths, selected_gen_lengths, alpha=0.6, c='green')
    min_len, max_len = min(np.min(real_lengths), np.min(selected_gen_lengths)), max(np.max(real_lengths), np.max(selected_gen_lengths))
    ax.plot([min_len, max_len], [min_len, max_len], 'r--', linewidth=2, label='Ideal Line')
    ax.set_title('Path Length Comparison')
    ax.set_xlabel('Real Path Length')
    ax.set_ylabel('Generated Path Length')
    ax.legend()
    ax.grid(True, alpha=0.3)
    
    # 5. 各条件での多様性
    ax = axes[1, 1]
    ax.bar(range(len(condition_diversities)), condition_diversities, alpha=0.7, color='cyan')
    ax.set_title('Diversity per Condition')
    ax.set_xlabel('Condition Index')
    ax.set_ylabel('Intra-Condition Diversity')
    ax.grid(True, alpha=0.3)
    
    # 6. スコア表示
    ax = axes[1, 2]
    ax.axis('off')
    
    avg_diversity = np.mean(condition_diversities) if condition_diversities else 0
    overall_score = calculate_overall_score_hybrid_style(errors, avg_diversity)
    
    score_text = f"""HybridTransformer Style Evaluation
    
MSE: {errors['mse']:.6f}
Endpoint Error: {errors['endpoint_error']:.4f}
Path Length Error: {errors['length_error']:.4f}
Jerk Error: {errors['jerk_error']:.4f}
Intra-Condition Diversity: {avg_diversity:.4f}
Consistency: {errors['condition_consistency']:.4f}

Conditions: {num_conditions}
Samples per Condition: {samples_per_condition}
Total Samples: {len(generated)}

Overall Score: {overall_score:.1f}/100"""
    
    ax.text(0.1, 0.9, score_text, transform=ax.transAxes,
           fontsize=11, verticalalignment='top',
           bbox=dict(boxstyle='round', facecolor='lightgreen', alpha=0.8))
    
    plt.tight_layout()
    plt.show()  # notebook内でグラフ表示

def create_evaluation_plots(generated, real, conditions, errors):
    """評価結果の可視化（従来スタイル）- notebook内でグラフ表示"""
    fig, axes = plt.subplots(2, 3, figsize=(18, 12))
    
    # 1. 軌道比較サンプル
    ax = axes[0, 0]
    n_show = min(5, len(generated))
    for i in range(n_show):
        ax.plot(generated[i, :, 0], generated[i, :, 1], 'r-', alpha=0.7, linewidth=1.5, label='Generated' if i == 0 else "")
        ax.plot(real[i, :, 0], real[i, :, 1], 'b--', alpha=0.7, linewidth=1.5, label='Real' if i == 0 else "")
    ax.set_title('Trajectory Comparison Samples')
    ax.legend()
    ax.grid(True, alpha=0.3)
    ax.set_xlabel('X')
    ax.set_ylabel('Y')
    
    # 2. 誤差分布
    ax = axes[0, 1]
    point_errors = np.linalg.norm(generated - real, axis=2).flatten()
    ax.hist(point_errors, bins=50, alpha=0.7, color='orange', edgecolor='black')
    ax.axvline(np.mean(point_errors), color='red', linestyle='--', linewidth=2, label=f'Mean: {np.mean(point_errors):.4f}')
    ax.set_title('Point-wise Error Distribution')
    ax.set_xlabel('Error')
    ax.set_ylabel('Frequency')
    ax.legend()
    ax.grid(True, alpha=0.3)
    
    # 3. 終点誤差
    ax = axes[0, 2]
    endpoint_errors = np.linalg.norm(generated[:, -1] - real[:, -1], axis=1)
    ax.scatter(range(len(endpoint_errors)), endpoint_errors, alpha=0.6, c='purple')
    ax.axhline(np.mean(endpoint_errors), color='red', linestyle='--', linewidth=2, label=f'Mean: {np.mean(endpoint_errors):.4f}')
    ax.set_title('Endpoint Errors')
    ax.set_xlabel('Sample Index')
    ax.set_ylabel('Endpoint Error')
    ax.legend()
    ax.grid(True, alpha=0.3)
    
    # 4. 軌道長比較
    ax = axes[1, 0]
    gen_lengths = np.sum(np.linalg.norm(np.diff(generated, axis=1), axis=2), axis=1)
    real_lengths = np.sum(np.linalg.norm(np.diff(real, axis=1), axis=2), axis=1)
    ax.scatter(real_lengths, gen_lengths, alpha=0.6, c='green')
    min_len, max_len = min(np.min(real_lengths), np.min(gen_lengths)), max(np.max(real_lengths), np.max(gen_lengths))
    ax.plot([min_len, max_len], [min_len, max_len], 'r--', linewidth=2, label='Ideal Line')
    ax.set_title('Path Length Comparison')
    ax.set_xlabel('Real Path Length')
    ax.set_ylabel('Generated Path Length')
    ax.legend()
    ax.grid(True, alpha=0.3)
    
    # 5. 時間変化誤差
    ax = axes[1, 1]
    time_errors = np.mean(np.linalg.norm(generated - real, axis=2), axis=0)
    ax.plot(time_errors, 'b-', linewidth=2, marker='o', markersize=4)
    ax.set_title('Temporal Error Evolution')
    ax.set_xlabel('Time Step')
    ax.set_ylabel('Average Error')
    ax.grid(True, alpha=0.3)
    
    # 6. スコア表示
    ax = axes[1, 2]
    ax.axis('off')
    
    score_text = f"""Evaluation Scores
    
MSE: {errors['mse']:.6f}
Endpoint Error: {errors['endpoint_error']:.4f}
Path Length Error: {errors['length_error']:.4f}
Jerk Error: {errors['jerk_error']:.4f}
Diversity: {errors['diversity']:.4f}
Consistency: {errors['condition_consistency']:.4f}

Overall Score: {calculate_overall_score(errors):.1f}/100"""
    
    ax.text(0.1, 0.9, score_text, transform=ax.transAxes,
           fontsize=12, verticalalignment='top',
           bbox=dict(boxstyle='round', facecolor='lightcyan', alpha=0.8))
    
    plt.tight_layout()
    plt.show()  # notebook内でグラフ表示

evaluate_button.on_click(comprehensive_evaluation)

display(widgets.VBox([
    widgets.HTML("<h3>全体性能評価</h3>"),
    evaluation_samples_widget,
    evaluate_button,
    evaluation_output
]))

VBox(children=(HTML(value='<h3>全体性能評価</h3>'), IntSlider(value=9, description='評価サンプル数:', max=30, min=3, step=3…

## 使用方法

1. **モデル選択**: ドロップダウンメニューから評価したいモデルを選択
2. **ハイパーパラメータ設定**: 自動生成されるウィジェットでパラメータを調整
3. **学習実行**: 「学習実行」ボタンで新しいモデルを訓練（オプション）
4. **データ読み込み**: データセットパスを指定して読み込み
5. **軌道生成**: チェックポイントを選択して軌道生成・可視化
6. **全体評価**: 大量サンプルでの包括的性能評価

## 機能説明

- **インタラクティブUI**: ipywidgetsによる直感的な操作
- **モデル汎用性**: 設定ファイルベースで任意のモデルに対応
- **リアルタイム可視化**: 生成軌道と実軌道の重ね合わせ表示
- **詳細評価**: MSE、終点誤差、軌道長、ジャーク、多様性など多角的評価
- **バッチ処理**: 大量サンプルでの効率的な評価