In [1]:
# matplotlib設定
import logging
import matplotlib

# フォント警告抑制
logging.getLogger('matplotlib.font_manager').setLevel(logging.ERROR)

# DejaVu Sansフォント設定
matplotlib.rcParams['font.family'] = 'DejaVu Sans'
matplotlib.rcParams['font.sans-serif'] = ['DejaVu Sans', 'Ubuntu']

import matplotlib.pyplot as plt
%matplotlib inline

print("✅ matplotlib設定完了")

✅ matplotlib設定完了


# MLOps実験実行ノートブック

config駆動のMLOpsパイプライン実験を実行します。

## セル構成
1. **Import** - 必要なライブラリとモジュールのインポート
2. **データ読み込み・分割** - CSVデータの読み込みとtrain/test分割
3. **MLflow実行** - パイプライン構築、学習、評価、記録
4. **予測結果の確認と活用** - 予測結果DataFrameの確認と後続分析

## 1. Import

In [2]:
# 基本ライブラリ
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score
import mlflow
from omegaconf import OmegaConf
import warnings
warnings.filterwarnings('ignore')

# データ処理ユーティリティ
from src.utils.data_utils import get_dataset_name, detect_task_type, load_csv_data
from src.utils.cv_utils import create_cv_strategy

# components機能インポート
from src.mlops.components.pipeline import create_pipeline
from src.mlops.components.visualization import create_visualizations
from src.mlops.components.optimization import OptunaOptimizer
from src.mlops.components.artifacts import (
    save_model_artifacts, log_experiment_metrics, 
    setup_mlflow_experiment, set_mlflow_tags, 
    log_config_parameters, log_runtime_parameters,
    create_prediction_dataframe, save_prediction_results
)

print("✅ ライブラリインポート完了")

✅ ライブラリインポート完了


## 2. Config読み込み・データ分割

In [3]:
# Config読み込み（Hydraの代わりにOmegaConfで直接読み込み）
cfg = OmegaConf.load("config/config.yaml")

# pipelines設定を読み込んでマージ
pipeline_config = OmegaConf.load(f"config/pipelines/{cfg.defaults[2].pipelines}.yaml")
cfg = OmegaConf.merge(cfg, pipeline_config)

# models設定を読み込んでマージ
model_config = OmegaConf.load(f"config/models/classification/{cfg.defaults[1]['models/classification']}.yaml")
cfg = OmegaConf.merge(cfg, model_config)

# notebookディレクトリからの相対パス修正
cfg.data.file_path = f"{cfg.data.file_path}"

print("📋 Config読み込み完了")
print(f"  - Pipeline: {cfg.defaults[2].pipelines}")
print(f"  - Model: {cfg.defaults[1]['models/classification']}")
print(f"  - CV Strategy: {cfg.evaluation.cv_strategy}")
print(f"  - Optuna: {'有効' if cfg.optuna.enabled else '無効'}")

📋 Config読み込み完了
  - Pipeline: universal_features
  - Model: lightgbm
  - CV Strategy: {'module': 'sklearn.model_selection', 'class': 'StratifiedKFold', 'params': {'n_splits': 5, 'shuffle': True, 'random_state': '${globals.random_state}'}}
  - Optuna: 有効


In [4]:
# データ読み込み
print("📊 データ読み込み開始")
df, feature_cols, target_names = load_csv_data(cfg)

# データ分割
X = df[feature_cols]
y = df[cfg.data.target_column]

X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=cfg.data.test_size,
    random_state=cfg.data.random_state
)

# タスクタイプ判定
task_type = detect_task_type(y)

print(f"✅ データ準備完了")
print(f"  - データセット: {cfg.data.file_path}")
print(f"  - データ形状: {df.shape}")
print(f"  - 特徴量数: {len(feature_cols)}")
print(f"  - タスクタイプ: {task_type}")
print(f"  - Train/Test: {len(X_train)}/{len(X_test)}")
print(f"  - クラス数: {len(target_names) if target_names is not None else 'N/A'}")

📊 データ読み込み開始
📊 CSV: _data/raw/wine_classification.csv - Shape: (178, 15) - Features: 13 - Classes: 3
✅ データ準備完了
  - データセット: _data/raw/wine_classification.csv
  - データ形状: (178, 15)
  - 特徴量数: 13
  - タスクタイプ: classification
  - Train/Test: 142/36
  - クラス数: 3


## 3. MLflow実験実行

In [5]:
# MLflow実験セットアップ
setup_mlflow_experiment(cfg)

# 既存runがある場合は終了
if mlflow.active_run():
    mlflow.end_run()

# カスタムRun名設定（オプション）
run_name = getattr(cfg.mlflow, 'run_id', None)

print(f"🚀 MLflow実験開始")
print(f"  - Experiment: {cfg.mlflow.experiment_name}")
print(f"  - Run name: {run_name if run_name else '自動生成'}")

# 予測結果DataFrame保存用変数
df_predictions = None

with mlflow.start_run(run_name=run_name) as run:
    print(f"  - Run ID: {run.info.run_id[:8]}")
    
    # タグ設定（run開始後）
    set_mlflow_tags(cfg)
    
    # Optuna最適化（有効な場合）
    if cfg.optuna.enabled:
        print(f"\n🎯 Optuna最適化開始")
        optimizer = OptunaOptimizer(cfg, X_train, y_train, task_type)
        best_params, best_score = optimizer.optimize()
        print(f"🎯 Optuna best_params: {best_params}")
        print(f"  ✅ 最適化完了")
    else:
        best_params = {}
        best_score = 0.0
        print(f"⚠️ Optuna無効: best_params = {best_params}")
        print("  ⚠️ Optuna最適化はスキップ")
    
    # 最適化されたパイプライン構築（best_paramsを反映）
    passed_params = best_params if best_params else None
    print(f"📦 create_pipeline呼び出し: best_params={passed_params}")
    print(f"\n🔧 パイプライン構築")
    best_pipeline = create_pipeline(cfg, best_params=passed_params)
    print(f"  - ステップ数: {len(best_pipeline.steps)}")
    for step_name, step_obj in best_pipeline.steps:
        print(f"    - {step_name}: {type(step_obj).__name__}")
    
    # パイプライン学習
    print(f"\n📈 モデル学習")
    best_pipeline.fit(X_train, y_train)
    print(f"  ✅ 学習完了")
    
    # 実行時パラメータ記録
    log_runtime_parameters(best_pipeline, cfg, best_params)
    
    # テストデータ予測（1回のみ実行）
    print(f"\n📊 テストデータ予測")
    y_pred = best_pipeline.predict(X_test)
    print(f"  ✅ 予測完了: {len(y_pred)}件")
    
    # Optuna最適化時はCV評価済み、未実行時のみCV実行
    if not cfg.optuna.enabled:
        # クロスバリデーション評価（Optuna未使用時のみ）
        print(f"\n🔄 クロスバリデーション評価")
        if task_type == "classification":
            scoring = cfg.optuna.scoring.classification
        else:
            scoring = cfg.optuna.scoring.regression
        
        cv_strategy = create_cv_strategy(cfg)
        print(f"  - CV戦略: {cfg.evaluation.cv_strategy['class']} (n_splits={cfg.evaluation.cv_strategy.params.n_splits})")
        print(f"  - 評価指標: {scoring}")
        
        cv_scores = cross_val_score(
            best_pipeline, X_train, y_train,
            cv=cv_strategy,
            scoring=scoring
        )
        print(f"  - CVスコア: {cv_scores.mean():.3f} ± {cv_scores.std():.3f}")
    else:
        # Optuna使用時は最適化結果を使用
        cv_scores = np.array([best_score] * 5)  # best_scoreを5foldに展開（numpy配列で互換性維持）
        print(f"\n🔄 CV評価をスキップ（Optuna最適化済み: {best_score:.3f}）")
    
    # メトリクス記録
    print(f"\n📊 評価メトリクス計算")
    log_experiment_metrics(best_pipeline, X_train, y_train, X_test, y_test, task_type, cv_scores, y_pred=y_pred)
    
    # 予測結果DataFrame作成と保存
    print(f"\n📊 予測結果DataFrame作成")
    df_predictions = create_prediction_dataframe(best_pipeline, X_test, y_test, task_type, y_pred=y_pred)
    save_prediction_results(df_predictions, cfg)
    
    # 予測結果表示（既存の予測結果を使用）
    if task_type == "classification":
        from sklearn.metrics import accuracy_score, f1_score
        test_accuracy = accuracy_score(y_test, y_pred)
        test_f1 = f1_score(y_test, y_pred, average='weighted')
        print(f"  - Test Accuracy: {test_accuracy:.3f}")
        print(f"  - Test F1 Score: {test_f1:.3f}")
    else:
        from sklearn.metrics import mean_squared_error, r2_score
        test_mse = mean_squared_error(y_test, y_pred)
        test_r2 = r2_score(y_test, y_pred)
        print(f"  - Test MSE: {test_mse:.3f}")
        print(f"  - Test R²: {test_r2:.3f}")
    
    # 可視化生成（config駆動）
    if cfg.visualization.enabled:
        print(f"\n📈 可視化生成")
        target_names_str = [str(name) for name in target_names]
        create_visualizations(
            best_pipeline, X_train, y_train, X_test, y_test,
            target_names_str, cfg.visualization.plots, cfg, task_type
        )
        print(f"  ✅ 可視化完了: {', '.join(cfg.visualization.plots)}")
    
    # モデル・アーティファクト保存
    print(f"\n💾 アーティファクト保存")
    save_model_artifacts(best_pipeline, feature_cols, target_names, cfg)
    print(f"  ✅ モデル・メタデータ保存完了")
    
    print(f"\n✅ MLOps実験完了")
    print(f"  - Run ID: {run.info.run_id}")
    print(f"  - MLflow UI: http://localhost:5000")

🚀 MLflow実験開始
  - Experiment: config_driven_mlops
  - Run name: stratified_cv_test


[I 2025-09-22 13:29:34,634] A new study created in memory with name: ml_optimization


  - Run ID: 9f3511ec

🎯 Optuna最適化開始
🎯 Optuna最適化開始 | 5 trials | maximize
    [pipeline] trial mode: params={'n_estimators': 33, 'learning_rate': 0.16631282283255264, 'max_depth': 5, 'num_leaves': 6, 'min_child_samples': 17}
🗑️ 指定カラム削除: []
📊 分類タスクを検出: f_classif使用
📊 統計的特徴量選択: ['alcohol', 'malic_acid', 'alcalinity_of_ash', 'total_phenols', 'flavanoids', 'proanthocyanins', 'color_intensity', 'hue', 'od280/od315_of_diluted_wines', 'proline']
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000079 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 417
[LightGBM] [Info] Number of data points in the train set: 138, number of used features: 10
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
🗑️ 指定カラム削除: []
📊 分類タスクを検出: f_classif使用
📊 統計的特徴量選

[I 2025-09-22 13:29:38,322] Trial 0 finished with value: 0.9789064525044612 and parameters: {'n_estimators': 33, 'learning_rate': 0.16631282283255264, 'max_depth': 5, 'num_leaves': 6, 'min_child_samples': 17}. Best is trial 0 with value: 0.9789064525044612.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000056 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 415
[LightGBM] [Info] Number of data points in the train set: 135, number of used features: 10
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
🗑️ 指定カラム削除: []
📊 分類タスクを検出: f_classif使用
📊 統計的特徴量選択: ['alcohol', 'malic_acid', 'alcalinity_of_ash', 'total_phenols', 'flavanoids', 'nonflavanoid_phenols', 'color_intensity', 'hue', 'od280/od315_of_diluted_wines', 'proline']
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000061 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 402
[LightGBM] [Info] Number of data points in the train set: 135, number of used features: 10
[LightGBM] [Info] Start training from score 

[I 2025-09-22 13:30:02,260] Trial 1 finished with value: 0.9647962270207537 and parameters: {'n_estimators': 54, 'learning_rate': 0.05175619612288578, 'max_depth': 4, 'num_leaves': 9, 'min_child_samples': 14}. Best is trial 0 with value: 0.9789064525044612.



    [pipeline] trial mode: params={'n_estimators': 68, 'learning_rate': 0.1779877847278325, 'max_depth': 3, 'num_leaves': 6, 'min_child_samples': 6}
🗑️ 指定カラム削除: []
📊 分類タスクを検出: f_classif使用
🗑️ 指定カラム削除: []📊 統計的特徴量選択: ['alcohol', 'malic_acid', 'alcalinity_of_ash', 'total_phenols', 'flavanoids', 'proanthocyanins', 'color_intensity', 'hue', 'od280/od315_of_diluted_wines', 'proline']

📊 分類タスクを検出: f_classif使用
🗑️ 指定カラム削除: []
📊 分類タスクを検出: f_classif使用
📊 統計的特徴量選択: ['alcohol', 'malic_acid', 'alcalinity_of_ash', 'total_phenols', 'flavanoids', 'proanthocyanins', 'color_intensity', 'hue', 'od280/od315_of_diluted_wines', 'proline']
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000038 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
📊 統計的特徴量選択: ['alcohol', 'alcalinity_of_ash', 'total_phenols', 'flavanoids', 'nonflavanoid_phenols', 'proanthocyanins', 'color_intensity', 'hue', 'od280

[I 2025-09-22 13:30:05,596] Trial 2 finished with value: 0.9718710569527266 and parameters: {'n_estimators': 68, 'learning_rate': 0.1779877847278325, 'max_depth': 3, 'num_leaves': 6, 'min_child_samples': 6}. Best is trial 0 with value: 0.9789064525044612.


🗑️ 指定カラム削除: []
📊 分類タスクを検出: f_classif使用
📊 統計的特徴量選択: ['alcohol', 'malic_acid', 'alcalinity_of_ash', 'total_phenols', 'flavanoids', 'proanthocyanins', 'color_intensity', 'hue', 'od280/od315_of_diluted_wines', 'proline']
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000051 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 415
[LightGBM] [Info] Number of data points in the train set: 135, number of used features: 10
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
    [pipeline] trial mode: params={'n_estimators': 31, 'learning_rate': 0.1821256108238669, 'max_depth': 5, 'num_leaves': 9, 'min_child_samples': 19}
🗑️ 指定カラム削除: []
📊 分類タスクを検出: f_classif使用
🗑️ 指定カラム削除: []
🗑️ 指定カラム削除: []
📊 分類タスクを検出: f_classif使用
📊 分類タスクを検出: f_classif使用
🗑️ 指定カラム削除: []
📊 分類タスクを検出: f_classif使用
📊 統計的特徴量選択: ['alcohol', 'mali

[I 2025-09-22 13:30:17,380] Trial 3 finished with value: 0.9719569860612122 and parameters: {'n_estimators': 31, 'learning_rate': 0.1821256108238669, 'max_depth': 5, 'num_leaves': 9, 'min_child_samples': 19}. Best is trial 0 with value: 0.9789064525044612.



    [pipeline] trial mode: params={'n_estimators': 69, 'learning_rate': 0.15205124053142588, 'max_depth': 5, 'num_leaves': 8, 'min_child_samples': 16}
🗑️ 指定カラム削除: []
📊 分類タスクを検出: f_classif使用
🗑️ 指定カラム削除: []
📊 分類タスクを検出: f_classif使用
🗑️ 指定カラム削除: []
📊 分類タスクを検出: f_classif使用
📊 統計的特徴量選択: ['alcohol', 'malic_acid', 'alcalinity_of_ash', 'total_phenols', 'flavanoids', 'proanthocyanins', 'color_intensity', 'hue', 'od280/od315_of_diluted_wines', 'proline']
🗑️ 指定カラム削除: []
📊 分類タスクを検出: f_classif使用
📊 統計的特徴量選択: ['alcohol', 'malic_acid', 'alcalinity_of_ash', 'total_phenols', 'flavanoids', 'nonflavanoid_phenols', 'color_intensity', 'hue', 'od280/od315_of_diluted_wines', 'proline']
🗑️ 指定カラム削除: []
📊 統計的特徴量選択: ['alcohol', 'malic_acid', 'alcalinity_of_ash', 'total_phenols', 'flavanoids', 'proanthocyanins', 'color_intensity', 'hue', 'od280/od315_of_diluted_wines', 'proline']
📊 分類タスクを検出: f_classif使用
📊 統計的特徴量選択: ['alcohol', 'alcalinity_of_ash', 'total_phenols', 'flavanoids', 'nonflavanoid_phenols', 'proanthocyani

[I 2025-09-22 13:30:37,940] Trial 4 finished with value: 0.9790819413064682 and parameters: {'n_estimators': 69, 'learning_rate': 0.15205124053142588, 'max_depth': 5, 'num_leaves': 8, 'min_child_samples': 16}. Best is trial 4 with value: 0.9790819413064682.







🎯 Optuna最適化完了 | Best: 0.979 | Params: {'n_estimators': 69, 'learning_rate': 0.15205124053142588, 'max_depth': 5, 'num_leaves': 8, 'min_child_samples': 16}
🎯 Optuna best_params: {'n_estimators': 69, 'learning_rate': 0.15205124053142588, 'max_depth': 5, 'num_leaves': 8, 'min_child_samples': 16}
  ✅ 最適化完了
📦 create_pipeline呼び出し: best_params={'n_estimators': 69, 'learning_rate': 0.15205124053142588, 'max_depth': 5, 'num_leaves': 8, 'min_child_samples': 16}

🔧 パイプライン構築
    [pipeline] best_params mode: params={'n_estimators': 69, 'learning_rate': 0.15205124053142588, 'max_depth': 5, 'num_leaves': 8, 'min_child_samples': 16}
  - ステップ数: 5
    - sampler: SMOTE
    - custom_drop: CustomColumnDropper
    - smart_selection: SmartFeatureSelector
    - scaler: StandardScaler
    - classifier: LGBMClassifier

📈 モデル学習
🗑️ 指定カラム削除: []
📊 分類タスクを検出: f_classif使用
📊 統計的特徴量選択: ['alcohol', 'malic_acid', 'alcalinity_of_ash', 'total_phenols', 'flavanoids', 'nonflavanoid_phenols', 'color_intensity', 'hue', 'od

ExactExplainer explainer: 37it [00:13,  2.08it/s]                        


⚠️ 可視化エラー discrimination_threshold: multiclass format is not supported
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 11, number of used features: 0
[LightGBM] [Info] Start training from score -1.011601
[LightGBM] [Info] Start training from score -1.299283
[LightGBM] [Info] Start training from score -1.011601
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000031 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 130
[LightGBM] [Info] Number of data points in the train set: 36, number of used features: 10
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -0.944462
[LightGBM] [Info] Start training from score -1.280934
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000034 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 205
[Ligh

## 4. 予測結果の確認と活用

予測結果DataFrameを確認し、後続の分析に活用できます。

In [6]:
# 予測結果DataFrameの確認
if df_predictions is not None:
    print("📊 予測結果DataFrame情報")
    print(f"  - データ件数: {len(df_predictions)}件")
    print(f"  - カラム数: {len(df_predictions.columns)}列")
    
    # 分類タスクの場合のカラム情報
    if task_type == "classification":
        proba_cols = [col for col in df_predictions.columns if col.startswith('proba_class_')]
        print(f"  - 確率カラム: {proba_cols}")
        print(f"  - 信頼度カラム: prediction_confidence")
    
    print("\n📋 予測結果サンプル（先頭5件）:")
    display(df_predictions.head())
else:
    print("⚠️ 予測結果DataFrameがまだ作成されていません。上のセルを実行してください。")

📊 予測結果DataFrame情報
  - データ件数: 36件
  - カラム数: 29列
  - 確率カラム: ['proba_class_0', 'proba_class_1', 'proba_class_2']
  - 信頼度カラム: prediction_confidence

📋 予測結果サンプル（先頭5件）:


Unnamed: 0,alcohol,malic_acid,ash,alcalinity_of_ash,magnesium,total_phenols,flavanoids,nonflavanoid_phenols,proanthocyanins,color_intensity,...,generated_feature_6,generated_feature_7,generated_feature_8,generated_feature_9,y_true,y_pred,proba_class_0,proba_class_1,proba_class_2,prediction_confidence
19,13.64,3.1,2.56,15.2,116.0,2.7,3.03,0.17,1.66,5.1,...,-0.127787,0.112557,1.108291,0.295364,0,0,0.999999,7.173377e-07,6.81579e-07,0.999999
45,14.21,4.04,2.44,18.9,111.0,2.85,2.65,0.3,1.25,5.24,...,-0.068865,-0.269855,1.067814,1.078164,0,0,0.999995,2.484662e-06,2.63955e-06,0.999995
140,12.93,2.81,2.7,21.0,96.0,1.54,0.5,0.53,0.75,4.6,...,-0.338222,-0.694757,-0.308399,-0.520748,2,2,9e-06,2.576025e-05,0.9999652,0.999965
30,13.73,1.5,2.7,22.5,101.0,3.0,3.25,0.29,2.38,5.7,...,0.124735,1.089832,0.231292,1.761033,0,0,0.999887,0.0001014306,1.199576e-05,0.999887
67,12.37,1.17,1.92,19.6,78.0,2.11,2.0,0.27,1.04,4.68,...,-0.304553,0.792401,1.270199,-0.820544,1,1,1.2e-05,0.9999797,8.581311e-06,0.99998


In [7]:
# 予測精度の詳細分析（分類タスクの例）
if df_predictions is not None and task_type == "classification":
    # 予測の信頼度分布
    import matplotlib.pyplot as plt
    
    plt.figure(figsize=(10, 4))
    
    # 信頼度ヒストグラム
    plt.subplot(1, 2, 1)
    plt.hist(df_predictions['prediction_confidence'], bins=20, edgecolor='black')
    plt.xlabel('予測信頼度')
    plt.ylabel('件数')
    plt.title('予測信頼度の分布')
    
    # 正誤別の信頼度
    plt.subplot(1, 2, 2)
    df_predictions['is_correct'] = df_predictions['y_true'] == df_predictions['y_pred']
    correct_conf = df_predictions[df_predictions['is_correct']]['prediction_confidence']
    incorrect_conf = df_predictions[~df_predictions['is_correct']]['prediction_confidence']
    
    plt.boxplot([correct_conf, incorrect_conf], labels=['正解', '不正解'])
    plt.ylabel('予測信頼度')
    plt.title('正誤別の予測信頼度')
    
    plt.tight_layout()
    plt.show()
    
    # 統計情報
    print("📊 予測信頼度の統計:")
    print(f"  - 全体平均: {df_predictions['prediction_confidence'].mean():.3f}")
    print(f"  - 正解時平均: {correct_conf.mean():.3f}")
    print(f"  - 不正解時平均: {incorrect_conf.mean():.3f}")

📊 予測信頼度の統計:
  - 全体平均: 0.993
  - 正解時平均: 0.998
  - 不正解時平均: 0.800


## 追加: データ分析とエクスポート

予測結果DataFrameはそのまま後続の分析に利用可能です。

In [8]:
# 予測結果をローカルCSVとして保存（必要な場合）
if df_predictions is not None:
    output_path = "test_predictions_local.csv"
    df_predictions.to_csv(output_path, index=False)
    print(f"✅ 予測結果をローカル保存: {output_path}")
    
    # 必要なカラムのみ抽出した例
    df_essential = df_predictions[['y_true', 'y_pred', 'prediction_confidence']]
    print(f"\n📋 エッセンシャル予測結果（y_true, y_pred, confidence）:")
    display(df_essential.head())

✅ 予測結果をローカル保存: test_predictions_local.csv

📋 エッセンシャル予測結果（y_true, y_pred, confidence）:


Unnamed: 0,y_true,y_pred,prediction_confidence
19,0,0,0.999999
45,0,0,0.999995
140,2,2,0.999965
30,0,0,0.999887
67,1,1,0.99998


## MLflow UIの起動

実験結果を確認するには、ターミナルで以下を実行：
```bash
mlflow ui
```

その後、ブラウザで http://localhost:5000 にアクセス