# Experiment: CAFPO Phase B v3（foldなし単発 split）

## 目的
- 既存v2（rolling fold）を、単発の train/test split に置き換える。
- `PPO/DDPG × 3報酬` を同一 test 期間で比較する。
- `best_method` 1条件に対して SHAP で因子寄与分析を実施する。

## split 定義
- Train: 2001-01 〜 2020-12
- Test: 2021-01 〜 2025-12

## 論文との差分（継続）
- CRSP / 94企業特性 / FF5 は未使用（手元データ代替）。
- 制約は Long-only（`w>=0`, `sum(w)=1`）。
- Value-weight は時価総額データ不在のため未実装。
- 取引コスト・スリッページは 0 固定。


In [None]:
import os
import random
from datetime import datetime
from itertools import product
from pathlib import Path

import gymnasium as gym
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from gymnasium import spaces
from scipy.optimize import minimize
from stable_baselines3 import DDPG, PPO
from stable_baselines3.common.noise import NormalActionNoise
from stable_baselines3.common.vec_env import DummyVecEnv

PROJECT_ROOT = Path('/Users/kencharoff/workspace/projects/rl/p03_deep_reinforcement_learning_in_factor_investment')
DATA_DIR = PROJECT_ROOT / 'data'
OUTPUT_DIR = PROJECT_ROOT / 'outputs'
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

# Matplotlib cache warning 回避
MPL_DIR = OUTPUT_DIR / 'tmp_mpl'
MPL_DIR.mkdir(parents=True, exist_ok=True)
os.environ['MPLCONFIGDIR'] = str(MPL_DIR)

import shap  # noqa: E402

SEED = 42


def set_seed(seed: int = 42) -> None:
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)


set_seed(SEED)

CONFIG = {
    'start_date': '2001-01-31',
    'end_date': '2025-12-31',
    'train_start': '2001-01-31',
    'train_end': '2020-12-31',
    'test_start': '2021-01-31',
    'test_end': '2025-12-31',
    'split_name': 'train_2001_2020_test_2021_2025',
    'num_factors': 4,
    'hidden_dim': 32,
    'cae_lr': 1e-3,
    'cae_epochs': 200,
    'lookback': 12,
    'reward_eta': 0.01,
    'reward_eps': 1e-8,
    'reward_modes': ['log_return', 'diff_sharpe', 'diff_ddr'],
    'algos': ['PPO', 'DDPG'],
    'ppo_learning_rate': 3e-4,
    'ppo_gamma': 0.99,
    'ppo_n_steps': 96,
    'ppo_batch_size': 32,
    'ppo_ent_coef': 0.0,
    'ppo_total_timesteps': 20_000,
    'ddpg_learning_rate': 1e-3,
    'ddpg_buffer_size': 50_000,
    'ddpg_learning_starts': 1_000,
    'ddpg_batch_size': 64,
    'ddpg_tau': 0.005,
    'ddpg_gamma': 0.99,
    'ddpg_train_freq': (1, 'step'),
    'ddpg_gradient_steps': 1,
    'ddpg_total_timesteps': 20_000,
    'ddpg_noise_sigma': 0.1,
    'markowitz_window': 36,
    'cost_bps': 0.0,
    'shap_background': 128,
    'shap_explain': 120,
    'shap_kernel_nsamples': 100,
    'perm_repeats': 10,
    'perm_seed': 42,
    'ig_steps': 64,
    'ig_baseline_mode': 'train_mean',
    'interp_methods': ['shap', 'permutation', 'integrated_gradients'],
}

print('CONFIG loaded')
print('torch:', torch.__version__)
print('pandas:', pd.__version__)
print('shap:', shap.__version__)


In [None]:
sector_daily = pd.read_csv(DATA_DIR / 'sector_returns.csv', parse_dates=['Date'])
macro_daily = pd.read_csv(DATA_DIR / 'macro.csv', parse_dates=['Date'])
spx_daily = pd.read_csv(DATA_DIR / 'SPX.csv', parse_dates=['Date'])

expected_sector_cols = {'Date', 'XLB', 'XLE', 'XLF', 'XLI', 'XLK', 'XLP', 'XLU', 'XLV', 'XLY'}
expected_macro_cols = {'Date', '^VIX', 'DX-Y.NYB', 'SPRED'}
expected_spx_cols = {'Date', '^GSPC'}

assert expected_sector_cols.issubset(sector_daily.columns), 'sector_returns.csv columns mismatch'
assert expected_macro_cols.issubset(macro_daily.columns), 'macro.csv columns mismatch'
assert expected_spx_cols.issubset(spx_daily.columns), 'SPX.csv columns mismatch'

print('sector_daily shape:', sector_daily.shape)
print('macro_daily shape:', macro_daily.shape)
print('spx_daily shape:', spx_daily.shape)


In [None]:
sector_daily = sector_daily.sort_values('Date').set_index('Date')
macro_daily = macro_daily.sort_values('Date').set_index('Date')
spx_daily = spx_daily.sort_values('Date').set_index('Date')

full_sector_monthly = (1.0 + sector_daily).resample('ME').prod() - 1.0
full_macro_monthly = macro_daily.resample('ME').last()
full_macro_lag1 = full_macro_monthly.shift(1)
full_spx_monthly_ret = spx_daily.resample('ME').last().pct_change().rename(columns={'^GSPC': 'SPX_RET'})

decision_index = full_sector_monthly.index[
    (full_sector_monthly.index >= pd.Timestamp(CONFIG['start_date']))
    & (full_sector_monthly.index <= pd.Timestamp(CONFIG['end_date']))
]

assets = full_sector_monthly.columns.tolist()

sector_monthly = full_sector_monthly.reindex(decision_index)
target_monthly = full_sector_monthly.shift(-1).reindex(decision_index)
macro_lag1 = full_macro_lag1.reindex(decision_index).ffill().bfill()
spx_monthly_ret = full_spx_monthly_ret.reindex(decision_index).ffill().bfill()

assert sector_monthly.index.is_monotonic_increasing
assert sector_monthly.index.is_unique
assert target_monthly.notna().all().all(), 'target_monthly contains NaN'

print('decision months:', len(decision_index))
print('decision start/end:', decision_index.min().date(), decision_index.max().date())
print('assets:', assets)


In [None]:
def rolling_prod_return(df: pd.DataFrame, window: int) -> pd.DataFrame:
    return (1.0 + df).rolling(window=window, min_periods=1).apply(np.prod, raw=True) - 1.0


def downside_deviation(df: pd.DataFrame, window: int) -> pd.DataFrame:
    down = df.clip(upper=0.0)
    return np.sqrt((down**2).rolling(window=window, min_periods=1).mean())


def cs_rank_normalize(df: pd.DataFrame) -> pd.DataFrame:
    out = np.zeros_like(df.to_numpy(dtype=np.float64), dtype=np.float64)
    arr = df.to_numpy(dtype=np.float64)
    for i in range(arr.shape[0]):
        row = arr[i].copy()
        if np.isnan(row).all():
            out[i] = 0.0
            continue
        med = np.nanmedian(row)
        row = np.where(np.isnan(row), med, row)
        if np.allclose(row, row[0]):
            out[i] = 0.0
        else:
            ranks = pd.Series(row).rank(method='average', pct=True).to_numpy(dtype=np.float64)
            out[i] = 2.0 * ranks - 1.0
    return pd.DataFrame(out, index=df.index, columns=df.columns)


mom_1m = sector_monthly.copy()
mom_3m = rolling_prod_return(sector_monthly, 3)
mom_6m = rolling_prod_return(sector_monthly, 6)
vol_3m = sector_monthly.rolling(window=3, min_periods=1).std().fillna(0.0)
vol_6m = sector_monthly.rolling(window=6, min_periods=1).std().fillna(0.0)
downside_6m = downside_deviation(sector_monthly, 6).fillna(0.0)

macro_frames = {}
for raw_col, feat_name in [
    ('^VIX', 'macro_vix_lag1'),
    ('DX-Y.NYB', 'macro_dxy_lag1'),
    ('SPRED', 'macro_spread_lag1'),
]:
    vals = macro_lag1[raw_col].to_numpy(dtype=np.float64)
    mat = np.repeat(vals[:, None], len(assets), axis=1)
    macro_frames[feat_name] = pd.DataFrame(mat, index=decision_index, columns=assets)

raw_feature_frames = {
    'mom_1m': mom_1m,
    'mom_3m': mom_3m,
    'mom_6m': mom_6m,
    'vol_3m': vol_3m,
    'vol_6m': vol_6m,
    'downside_6m': downside_6m,
    **macro_frames,
}

norm_feature_frames = {name: cs_rank_normalize(frame) for name, frame in raw_feature_frames.items()}

feature_order = [
    'mom_1m',
    'mom_3m',
    'mom_6m',
    'vol_3m',
    'vol_6m',
    'downside_6m',
    'macro_vix_lag1',
    'macro_dxy_lag1',
    'macro_spread_lag1',
]

feature_tensor = np.stack(
    [norm_feature_frames[name].to_numpy(dtype=np.float32) for name in feature_order],
    axis=2,
)
returns_tensor = sector_monthly.to_numpy(dtype=np.float32)
target_tensor = target_monthly.to_numpy(dtype=np.float32)
macro_state = macro_lag1[['^VIX', 'DX-Y.NYB', 'SPRED']].copy().ffill().bfill().to_numpy(dtype=np.float32)

assert np.isfinite(feature_tensor).all(), 'feature_tensor has non-finite values'
assert np.isfinite(returns_tensor).all(), 'returns_tensor has non-finite values'
assert np.isfinite(target_tensor).all(), 'target_tensor has non-finite values'
assert np.isfinite(macro_state).all(), 'macro_state has non-finite values'

next_dates = decision_index + pd.offsets.MonthEnd(1)
assert (next_dates > decision_index).all(), 'date alignment failed'

print('feature_tensor [T,N,P]:', feature_tensor.shape)
print('returns_tensor [T,N]:', returns_tensor.shape)
print('target_tensor [T,N]:', target_tensor.shape)
print('macro_state [T,3]:', macro_state.shape)


In [None]:
class ConditionalAutoencoder(nn.Module):
    def __init__(self, num_features: int, num_assets: int, hidden_dim: int, num_factors: int):
        super().__init__()
        self.num_assets = num_assets
        self.num_factors = num_factors
        self.covariates_net = nn.Sequential(
            nn.Linear(num_features, hidden_dim),
            nn.LeakyReLU(),
            nn.Linear(hidden_dim, num_factors),
        )
        self.factor_net = nn.Linear(num_assets, num_factors)

    def forward(self, z: torch.Tensor, r: torch.Tensor):
        t, n, p = z.shape
        beta = self.covariates_net(z.reshape(t * n, p)).reshape(t, n, self.num_factors)
        factors = self.factor_net(r)
        recon = torch.einsum('tnk,tk->tn', beta, factors)
        return recon, factors

    @torch.no_grad()
    def encode_factors(self, r: torch.Tensor) -> torch.Tensor:
        return self.factor_net(r)


def train_cae(z_train: np.ndarray, r_train: np.ndarray, cfg: dict):
    set_seed(SEED)
    model = ConditionalAutoencoder(
        num_features=z_train.shape[2],
        num_assets=r_train.shape[1],
        hidden_dim=cfg['hidden_dim'],
        num_factors=cfg['num_factors'],
    )
    optimizer = torch.optim.Adam(model.parameters(), lr=cfg['cae_lr'])
    criterion = nn.MSELoss()

    z_t = torch.tensor(z_train, dtype=torch.float32)
    r_t = torch.tensor(r_train, dtype=torch.float32)
    losses = []

    model.train()
    for _ in range(cfg['cae_epochs']):
        optimizer.zero_grad()
        recon, _ = model(z_t, r_t)
        loss = criterion(recon, r_t)
        loss.backward()
        optimizer.step()
        losses.append(float(loss.item()))

    return model, losses


@torch.no_grad()
def infer_factors(model: ConditionalAutoencoder, r_all: np.ndarray) -> np.ndarray:
    model.eval()
    r_t = torch.tensor(r_all, dtype=torch.float32)
    factors = model.encode_factors(r_t)
    return factors.cpu().numpy().astype(np.float32)


In [None]:
def softmax_weights(logits: np.ndarray) -> np.ndarray:
    logits = np.asarray(logits, dtype=np.float64)
    logits = logits - np.max(logits)
    exp_v = np.exp(logits)
    den = np.sum(exp_v)
    if den <= 0:
        return np.ones_like(exp_v) / len(exp_v)
    return (exp_v / den).astype(np.float64)


def build_state(factors: np.ndarray, macro: np.ndarray, idx: int, lookback: int) -> np.ndarray:
    window = factors[idx - lookback + 1 : idx + 1].reshape(-1)
    state = np.concatenate([window, macro[idx]], axis=0)
    return state.astype(np.float32)


def compute_log_return_reward(port_ret: float) -> float:
    return float(np.log1p(max(port_ret, -0.999999)))


def compute_diff_sharpe_reward(port_ret: float, a_prev: float, b_prev: float, eta: float, eps: float):
    delta_a = eta * (port_ret - a_prev)
    delta_b = eta * (port_ret * port_ret - b_prev)
    denom = (b_prev - a_prev * a_prev + eps) ** 1.5
    reward = (b_prev * delta_a - 0.5 * a_prev * delta_b) / denom
    a_new = a_prev + delta_a
    b_new = b_prev + delta_b
    return float(reward), float(a_new), float(b_new)


def compute_diff_ddr_reward(port_ret: float, a_prev: float, dd2_prev: float, eta: float, eps: float):
    dd_prev = float(np.sqrt(max(dd2_prev, eps)))
    if port_ret > 0.0:
        reward = (port_ret - 0.5 * a_prev) / dd_prev
    else:
        reward = (dd2_prev * (port_ret - 0.5 * a_prev) - 0.5 * a_prev * (port_ret**2)) / ((dd_prev**3) + eps)

    a_new = a_prev + eta * (port_ret - a_prev)
    dd2_new = dd2_prev + eta * (min(port_ret, 0.0) ** 2 - dd2_prev)
    return float(reward), float(a_new), float(dd2_new)


class PortfolioEnv(gym.Env):
    metadata = {'render_modes': []}

    def __init__(
        self,
        factors: np.ndarray,
        macro: np.ndarray,
        target_returns: np.ndarray,
        decision_dates: pd.DatetimeIndex,
        start_idx: int,
        end_idx: int,
        lookback: int,
        reward_mode: str,
        reward_eta: float,
        reward_eps: float,
    ):
        super().__init__()
        self.factors = factors
        self.macro = macro
        self.target_returns = target_returns
        self.decision_dates = decision_dates
        self.start_idx = start_idx
        self.end_idx = end_idx
        self.lookback = lookback
        self.reward_mode = reward_mode
        self.reward_eta = reward_eta
        self.reward_eps = reward_eps

        self.n_assets = target_returns.shape[1]
        self.obs_dim = lookback * factors.shape[1] + macro.shape[1]

        self.action_space = spaces.Box(low=-10.0, high=10.0, shape=(self.n_assets,), dtype=np.float32)
        self.observation_space = spaces.Box(
            low=-np.inf,
            high=np.inf,
            shape=(self.obs_dim,),
            dtype=np.float32,
        )

        self.ptr = None
        self.weight_history = []
        self.return_history = []

        self.a_moment = 0.0
        self.b_moment = 1e-6
        self.dd2_moment = 1e-6

    def reset(self, seed=None, options=None):
        super().reset(seed=seed)
        self.ptr = self.start_idx
        self.weight_history = []
        self.return_history = []
        self.a_moment = 0.0
        self.b_moment = 1e-6
        self.dd2_moment = 1e-6
        obs = build_state(self.factors, self.macro, self.ptr, self.lookback)
        return obs, {}

    def _compute_reward(self, port_ret: float) -> float:
        if self.reward_mode == 'log_return':
            reward = compute_log_return_reward(port_ret)
        elif self.reward_mode == 'diff_sharpe':
            reward, a_new, b_new = compute_diff_sharpe_reward(
                port_ret,
                self.a_moment,
                self.b_moment,
                self.reward_eta,
                self.reward_eps,
            )
            self.a_moment, self.b_moment = a_new, b_new
        elif self.reward_mode == 'diff_ddr':
            reward, a_new, dd2_new = compute_diff_ddr_reward(
                port_ret,
                self.a_moment,
                self.dd2_moment,
                self.reward_eta,
                self.reward_eps,
            )
            self.a_moment, self.dd2_moment = a_new, dd2_new
        else:
            raise ValueError(f'unknown reward_mode: {self.reward_mode}')

        reward = float(np.nan_to_num(reward, nan=0.0, posinf=1e3, neginf=-1e3))
        if reward > 1e3:
            reward = 1e3
        if reward < -1e3:
            reward = -1e3
        assert np.isfinite(reward), 'non-finite reward'
        return reward

    def step(self, action):
        weights = softmax_weights(action)
        port_ret = float(np.dot(weights, self.target_returns[self.ptr]))
        port_ret = max(port_ret, -0.999999)
        reward = self._compute_reward(port_ret)

        self.weight_history.append(weights)
        self.return_history.append(port_ret)

        info = {
            'date': str(self.decision_dates[self.ptr].date()),
            'next_date': str((self.decision_dates[self.ptr] + pd.offsets.MonthEnd(1)).date()),
            'portfolio_return': port_ret,
            'reward_mode': self.reward_mode,
        }

        self.ptr += 1
        terminated = self.ptr > self.end_idx
        truncated = False

        if terminated:
            obs = np.zeros(self.obs_dim, dtype=np.float32)
        else:
            obs = build_state(self.factors, self.macro, self.ptr, self.lookback)

        return obs, reward, terminated, truncated, info


In [None]:
def train_rl_for_fold(
    algo_name: str,
    reward_mode: str,
    factors: np.ndarray,
    macro: np.ndarray,
    target_returns: np.ndarray,
    decision_dates: pd.DatetimeIndex,
    train_idx: np.ndarray,
    cfg: dict,
):
    start_idx = int(train_idx[0] + cfg['lookback'] - 1)
    end_idx = int(train_idx[-1])
    assert start_idx <= end_idx, 'invalid train range for RL'

    def make_env():
        return PortfolioEnv(
            factors=factors,
            macro=macro,
            target_returns=target_returns,
            decision_dates=decision_dates,
            start_idx=start_idx,
            end_idx=end_idx,
            lookback=cfg['lookback'],
            reward_mode=reward_mode,
            reward_eta=cfg['reward_eta'],
            reward_eps=cfg['reward_eps'],
        )

    vec_env = DummyVecEnv([make_env])

    if algo_name == 'PPO':
        model = PPO(
            policy='MlpPolicy',
            env=vec_env,
            learning_rate=cfg['ppo_learning_rate'],
            gamma=cfg['ppo_gamma'],
            n_steps=cfg['ppo_n_steps'],
            batch_size=cfg['ppo_batch_size'],
            ent_coef=cfg['ppo_ent_coef'],
            seed=SEED,
            verbose=0,
            device='cpu',
        )
        model.learn(total_timesteps=cfg['ppo_total_timesteps'])
        return model

    if algo_name == 'DDPG':
        n_actions = target_returns.shape[1]
        noise = NormalActionNoise(
            mean=np.zeros(n_actions, dtype=np.float32),
            sigma=cfg['ddpg_noise_sigma'] * np.ones(n_actions, dtype=np.float32),
        )
        model = DDPG(
            policy='MlpPolicy',
            env=vec_env,
            learning_rate=cfg['ddpg_learning_rate'],
            buffer_size=cfg['ddpg_buffer_size'],
            learning_starts=cfg['ddpg_learning_starts'],
            batch_size=cfg['ddpg_batch_size'],
            tau=cfg['ddpg_tau'],
            gamma=cfg['ddpg_gamma'],
            train_freq=cfg['ddpg_train_freq'],
            gradient_steps=cfg['ddpg_gradient_steps'],
            action_noise=noise,
            seed=SEED,
            verbose=0,
            device='cpu',
        )
        model.learn(total_timesteps=cfg['ddpg_total_timesteps'])
        return model

    raise ValueError(f'unsupported algo: {algo_name}')


def run_policy(
    model,
    method_name: str,
    algo_name: str,
    reward_mode: str,
    factors: np.ndarray,
    macro: np.ndarray,
    target_returns: np.ndarray,
    decision_dates: pd.DatetimeIndex,
    test_idx: np.ndarray,
    split_name: str,
    lookback: int,
):
    rows = []
    weights = []
    for idx in test_idx:
        idx = int(idx)
        obs = build_state(factors, macro, idx, lookback)
        action, _ = model.predict(obs, deterministic=True)
        w = softmax_weights(action)
        port_ret = float(np.dot(w, target_returns[idx]))
        port_ret = max(port_ret, -0.999999)
        rows.append(
            {
                'date': decision_dates[idx],
                'next_date': decision_dates[idx] + pd.offsets.MonthEnd(1),
                'split': str(split_name),
                'method': method_name,
                'algo': algo_name,
                'reward_mode': reward_mode,
                'portfolio_return': port_ret,
            }
        )
        weights.append(w)
    return pd.DataFrame(rows), np.vstack(weights)


def run_equal_weight(
    target_returns: np.ndarray,
    decision_dates: pd.DatetimeIndex,
    test_idx: np.ndarray,
    split_name: str,
):
    n_assets = target_returns.shape[1]
    w = np.ones(n_assets, dtype=np.float64) / n_assets
    rows = []
    for idx in test_idx:
        idx = int(idx)
        port_ret = float(np.dot(w, target_returns[idx]))
        port_ret = max(port_ret, -0.999999)
        rows.append(
            {
                'date': decision_dates[idx],
                'next_date': decision_dates[idx] + pd.offsets.MonthEnd(1),
                'split': str(split_name),
                'method': 'EqualWeight',
                'algo': 'BASELINE',
                'reward_mode': 'na',
                'portfolio_return': port_ret,
            }
        )
    return pd.DataFrame(rows), np.repeat(w[None, :], len(test_idx), axis=0)


def solve_markowitz(mu: np.ndarray, cov: np.ndarray) -> np.ndarray:
    n_assets = len(mu)
    x0 = np.ones(n_assets, dtype=np.float64) / n_assets
    bounds = [(0.0, 1.0)] * n_assets
    constraints = ({'type': 'eq', 'fun': lambda w: np.sum(w) - 1.0},)

    def neg_sharpe(w):
        ret = float(np.dot(w, mu))
        vol = float(np.sqrt(max(np.dot(w, cov @ w), 1e-12)))
        return -(ret / vol)

    res = minimize(neg_sharpe, x0=x0, method='SLSQP', bounds=bounds, constraints=constraints)
    if not res.success:
        return x0
    w = np.clip(res.x, 0.0, 1.0)
    s = w.sum()
    return x0 if s <= 0 else (w / s)


def run_markowitz_historical(
    returns_hist: np.ndarray,
    target_returns: np.ndarray,
    decision_dates: pd.DatetimeIndex,
    test_idx: np.ndarray,
    split_name: str,
    window: int,
):
    rows = []
    weights = []
    n_assets = returns_hist.shape[1]
    eq = np.ones(n_assets, dtype=np.float64) / n_assets

    for idx in test_idx:
        idx = int(idx)
        start = max(0, idx - window + 1)
        hist = returns_hist[start : idx + 1]
        if hist.shape[0] < 2:
            w = eq.copy()
        else:
            mu = np.nanmean(hist, axis=0)
            cov = np.cov(hist, rowvar=False)
            cov = np.nan_to_num(cov, nan=0.0, posinf=0.0, neginf=0.0)
            cov = cov + np.eye(cov.shape[0]) * 1e-6
            w = solve_markowitz(mu, cov)

        port_ret = float(np.dot(w, target_returns[idx]))
        port_ret = max(port_ret, -0.999999)
        rows.append(
            {
                'date': decision_dates[idx],
                'next_date': decision_dates[idx] + pd.offsets.MonthEnd(1),
                'split': str(split_name),
                'method': 'MarkowitzHist',
                'algo': 'BASELINE',
                'reward_mode': 'na',
                'portfolio_return': port_ret,
            }
        )
        weights.append(w)

    return pd.DataFrame(rows), np.vstack(weights)


def compute_metrics(ret: np.ndarray) -> dict:
    ret = np.asarray(ret, dtype=np.float64)
    compound = float(np.prod(1.0 + ret) - 1.0)
    mean_m = float(np.mean(ret))
    std_m = float(np.std(ret, ddof=1)) if len(ret) > 1 else 0.0
    sharpe = 0.0 if std_m == 0.0 else float(np.sqrt(12.0) * mean_m / std_m)
    wealth = np.cumprod(1.0 + ret)
    run_max = np.maximum.accumulate(wealth)
    drawdown = wealth / np.maximum(run_max, 1e-12) - 1.0
    max_dd = float(abs(np.min(drawdown)))
    sterling = 0.0 if max_dd == 0.0 else float(mean_m / max_dd)
    return {
        'CompoundReturn': compound,
        'SharpeRatio': sharpe,
        'SterlingRatio': sterling,
    }


def assert_long_only(weights: np.ndarray, atol: float = 1e-8):
    assert np.all(weights >= -atol), 'weights have negative values'
    assert np.allclose(weights.sum(axis=1), 1.0, atol=atol), 'weights do not sum to 1'


class PPOActionWrapper(nn.Module):
    def __init__(self, policy):
        super().__init__()
        self.policy = policy

    def forward(self, x):
        if x.ndim == 1:
            x = x.unsqueeze(0)
        x = x.float()
        features = self.policy.extract_features(x)
        latent_pi, _ = self.policy.mlp_extractor(features)
        return self.policy.action_net(latent_pi)


class DDPGActionWrapper(nn.Module):
    def __init__(self, actor):
        super().__init__()
        self.actor = actor

    def forward(self, x):
        if x.ndim == 1:
            x = x.unsqueeze(0)
        return self.actor(x.float())


def build_action_module(model, algo_name: str):
    if algo_name == 'PPO':
        module = PPOActionWrapper(model.policy)
    elif algo_name == 'DDPG':
        module = DDPGActionWrapper(model.actor)
    else:
        raise ValueError(f'unknown algo_name: {algo_name}')
    module.eval()
    return module


def collect_states(factors: np.ndarray, macro: np.ndarray, idx_array: np.ndarray, lookback: int) -> np.ndarray:
    valid_idx = [int(i) for i in idx_array if int(i) >= lookback - 1]
    states = [build_state(factors, macro, i, lookback) for i in valid_idx]
    return np.vstack(states) if states else np.zeros((0, lookback * factors.shape[1] + macro.shape[1]), dtype=np.float32)


def standardize_shap_values(shap_values, n_samples: int, n_features: int) -> np.ndarray:
    if isinstance(shap_values, list):
        mats = []
        for v in shap_values:
            arr = np.asarray(v)
            arr = np.squeeze(arr)
            if arr.ndim == 1:
                arr = arr.reshape(1, -1)
            if arr.shape != (n_samples, n_features):
                arr = arr.reshape(n_samples, n_features)
            mats.append(arr)
        return np.stack(mats, axis=-1)

    arr = np.asarray(shap_values)
    arr = np.squeeze(arr)
    if arr.ndim == 2:
        if arr.shape == (n_samples, n_features):
            return arr[:, :, None]
        if arr.shape == (n_features, n_samples):
            return arr.T[:, :, None]
    if arr.ndim == 3:
        if arr.shape[0] == n_samples and arr.shape[1] == n_features:
            return arr
        if arr.shape[1] == n_samples and arr.shape[2] == n_features:
            return np.transpose(arr, (1, 2, 0))
        if arr.shape[0] == n_features and arr.shape[1] == n_samples:
            return np.transpose(arr, (1, 0, 2))
    raise ValueError(f'unexpected shap value shape: {np.shape(shap_values)}')


def make_state_feature_names(lookback: int, num_factors: int):
    names = []
    for lag in range(lookback):
        lag_label = lookback - lag
        for k in range(num_factors):
            names.append(f'factor{k+1}_lag{lag_label}')
    names.extend(['macro_vix_lag1', 'macro_dxy_lag1', 'macro_spread_lag1'])
    return names


def normalize_importance_series(values: pd.Series) -> pd.Series:
    total = float(values.sum())
    if np.isfinite(total) and total > 0.0:
        return values / total
    # 情報量がゼロの場合は等配分で正規化し、比較可能な形を維持する
    n = len(values)
    if n == 0:
        return values
    return pd.Series(np.ones(n, dtype=np.float64) / float(n), index=values.index)


def aggregate_factor_contrib_from_shap(feature_importance: np.ndarray, lookback: int, num_factors: int) -> pd.DataFrame:
    rows = []
    for k in range(num_factors):
        idxs = [lag * num_factors + k for lag in range(lookback)]
        val = float(np.sum(feature_importance[idxs]))
        rows.append({'factor': f'factor{k+1}', 'importance': val})
    out = pd.DataFrame(rows)
    out['normalized_importance'] = normalize_importance_series(out['importance'])
    return out


def aggregate_factor_contrib_from_feature_importance(feature_importance: np.ndarray, lookback: int, num_factors: int) -> pd.DataFrame:
    rows = []
    for k in range(num_factors):
        idxs = [lag * num_factors + k for lag in range(lookback)]
        val = float(np.sum(feature_importance[idxs]))
        rows.append({'factor': f'factor{k+1}', 'importance': val})
    out = pd.DataFrame(rows)
    out['normalized_importance'] = normalize_importance_series(out['importance'])
    return out


def compute_policy_portfolio_returns(model, algo_name: str, states: np.ndarray, next_returns: np.ndarray):
    assert states.shape[0] == next_returns.shape[0], 'states and next_returns length mismatch'
    action_module = build_action_module(model, algo_name)
    with torch.no_grad():
        action = action_module(torch.tensor(states, dtype=torch.float32)).cpu().numpy()
    if action.ndim == 1:
        action = action.reshape(1, -1)

    weights = np.vstack([softmax_weights(a) for a in action])
    port = np.einsum('ij,ij->i', weights, next_returns.astype(np.float64))
    port = np.clip(port, -0.999999, None)
    return float(np.mean(port)), port.astype(np.float64), weights.astype(np.float64)


def plot_factor_contrib(factor_df: pd.DataFrame, title: str, out_path: Path):
    fig, ax = plt.subplots(figsize=(8, 5))
    ax.bar(factor_df['factor'], factor_df['normalized_importance'])
    ax.set_title(title)
    ax.set_ylabel('Normalized Importance')
    ax.set_xlabel('Factor')
    ax.grid(True, axis='y', alpha=0.3)
    fig.tight_layout()
    fig.savefig(out_path, dpi=150)
    plt.close(fig)


In [None]:
all_results = []
trained_models = {}
split_name = CONFIG['split_name']

train_mask = (decision_index >= pd.Timestamp(CONFIG['train_start'])) & (decision_index <= pd.Timestamp(CONFIG['train_end']))
test_mask = (decision_index >= pd.Timestamp(CONFIG['test_start'])) & (decision_index <= pd.Timestamp(CONFIG['test_end']))

train_idx = np.where(train_mask)[0]
test_idx = np.where(test_mask)[0]

assert len(train_idx) == 240, f'train months != 240: {len(train_idx)}'
assert len(test_idx) == 60, f'test months != 60: {len(test_idx)}'
assert len(np.intersect1d(train_idx, test_idx)) == 0, 'train/test overlap detected'

train_dates = decision_index[train_idx]
test_dates = decision_index[test_idx]
assert train_dates.is_monotonic_increasing and train_dates.is_unique
assert test_dates.is_monotonic_increasing and test_dates.is_unique
assert train_dates.max() < test_dates.min(), 'train end must be earlier than test start'

z_train = feature_tensor[train_idx]
r_train = returns_tensor[train_idx]
cae_model, cae_losses = train_cae(z_train, r_train, CONFIG)
factors_all = infer_factors(cae_model, returns_tensor)

# t+1 報酬整合のため、trainの最終月はRL学習から除外
rl_train_idx = train_idx[:-1]
assert rl_train_idx.max() < test_idx.min(), 'RL train should end before test starts'

for algo_name, reward_mode in product(CONFIG['algos'], CONFIG['reward_modes']):
    method_name = f'{algo_name}_{reward_mode}'
    rl_model = train_rl_for_fold(
        algo_name=algo_name,
        reward_mode=reward_mode,
        factors=factors_all,
        macro=macro_state,
        target_returns=target_tensor,
        decision_dates=decision_index,
        train_idx=rl_train_idx,
        cfg=CONFIG,
    )

    rl_df, rl_w = run_policy(
        model=rl_model,
        method_name=method_name,
        algo_name=algo_name,
        reward_mode=reward_mode,
        factors=factors_all,
        macro=macro_state,
        target_returns=target_tensor,
        decision_dates=decision_index,
        test_idx=test_idx,
        split_name=split_name,
        lookback=CONFIG['lookback'],
    )
    assert_long_only(rl_w)
    all_results.append(rl_df)

    trained_models[(algo_name, reward_mode)] = {
        'model': rl_model,
        'factors_all': factors_all.copy(),
        'train_idx': rl_train_idx.copy(),
        'test_idx': test_idx.copy(),
    }

eq_df, eq_w = run_equal_weight(
    target_returns=target_tensor,
    decision_dates=decision_index,
    test_idx=test_idx,
    split_name=split_name,
)
mk_df, mk_w = run_markowitz_historical(
    returns_hist=returns_tensor,
    target_returns=target_tensor,
    decision_dates=decision_index,
    test_idx=test_idx,
    split_name=split_name,
    window=CONFIG['markowitz_window'],
)
assert_long_only(eq_w)
assert_long_only(mk_w)

all_results.extend([eq_df, mk_df])

results_df = pd.concat(all_results, ignore_index=True)
results_df = results_df.sort_values(['date', 'method']).reset_index(drop=True)

assert np.isfinite(results_df['portfolio_return'].to_numpy(dtype=np.float64)).all(), 'non-finite return found'
assert results_df['split'].nunique() == 1 and results_df['split'].iloc[0] == split_name

expected_points = len(test_idx)
counts = results_df.groupby('method')['date'].nunique().to_dict()
for method_name, n_points in counts.items():
    assert n_points == expected_points, f'{method_name} points mismatch: {n_points}'

# 6条件（algo x reward）の網羅確認
rl_grid = results_df[results_df['algo'] != 'BASELINE'].groupby(['algo', 'reward_mode'])['date'].nunique().reset_index(name='n_dates')
assert len(rl_grid) == 6, 'RL condition grid is incomplete'
assert (rl_grid['n_dates'] == expected_points).all(), 'some RL conditions miss dates'

method_presence = results_df.pivot_table(index='date', columns='method', values='portfolio_return', aggfunc='size', fill_value=0)
assert (method_presence > 0).all().all(), 'methods are not aligned on identical test dates'

print('split_name:', split_name)
print('train months:', len(train_idx), train_dates.min().date(), train_dates.max().date())
print('test months:', len(test_idx), test_dates.min().date(), test_dates.max().date())
print('results rows:', len(results_df))
print('methods:', sorted(results_df['method'].unique()))
print(f'CAE loss(last)={cae_losses[-1]:.6f}')


In [None]:
metrics_records = []
for (method_name, algo_name, reward_mode), g in results_df.groupby(['method', 'algo', 'reward_mode']):
    g = g.sort_values('date')
    m = compute_metrics(g['portfolio_return'].to_numpy(dtype=np.float64))
    m.update({'method': method_name, 'algo': algo_name, 'reward_mode': reward_mode})
    metrics_records.append(m)

metrics_df = pd.DataFrame(metrics_records)[
    ['method', 'algo', 'reward_mode', 'CompoundReturn', 'SharpeRatio', 'SterlingRatio']
].sort_values(['algo', 'reward_mode', 'method']).reset_index(drop=True)

split_records = []
for (year, method_name, algo_name, reward_mode), g in results_df.assign(year=results_df['date'].dt.year).groupby(
    ['year', 'method', 'algo', 'reward_mode']
):
    m = compute_metrics(g.sort_values('date')['portfolio_return'].to_numpy(dtype=np.float64))
    m.update({'year': int(year), 'method': method_name, 'algo': algo_name, 'reward_mode': reward_mode})
    split_records.append(m)

split_grid_df = pd.DataFrame(split_records)[
    ['year', 'method', 'algo', 'reward_mode', 'CompoundReturn', 'SharpeRatio', 'SterlingRatio']
].sort_values(['year', 'algo', 'reward_mode', 'method']).reset_index(drop=True)

wide_returns = (
    results_df.pivot(index='date', columns='method', values='portfolio_return')
    .sort_index()
    .astype(float)
)
cumulative = (1.0 + wide_returns).cumprod() - 1.0

metrics_path = OUTPUT_DIR / 'phase_b_v3_metrics.csv'
cum_csv_path = OUTPUT_DIR / 'phase_b_v3_cumulative_returns.csv'
cum_png_path = OUTPUT_DIR / 'phase_b_v3_cumulative_returns.png'
split_grid_path = OUTPUT_DIR / 'phase_b_v3_split_grid.csv'

metrics_df.to_csv(metrics_path, index=False)
cumulative.reset_index().to_csv(cum_csv_path, index=False)
split_grid_df.to_csv(split_grid_path, index=False)

fig, ax = plt.subplots(figsize=(13, 7))
for col in cumulative.columns:
    ax.plot(cumulative.index, cumulative[col], label=col)
ax.set_title('Phase B v3 Cumulative Returns (single split: 2001-2020 train / 2021-2025 test)')
ax.set_xlabel('Date')
ax.set_ylabel('Cumulative Return')
ax.grid(True, alpha=0.3)
ax.legend(loc='upper left', ncol=2, fontsize=8)
fig.tight_layout()
fig.savefig(cum_png_path, dpi=150)
plt.close(fig)

rl_metrics = metrics_df[metrics_df['algo'].isin(['PPO', 'DDPG'])].copy()
best_row = rl_metrics.sort_values(['SharpeRatio', 'CompoundReturn'], ascending=False).iloc[0]
best_method = str(best_row['method'])
best_algo = str(best_row['algo'])
best_reward = str(best_row['reward_mode'])

print('best_method:', best_method)
print('best_algo:', best_algo, 'best_reward:', best_reward)
print('saved:', metrics_path)
print('saved:', cum_csv_path)
print('saved:', cum_png_path)
print('saved:', split_grid_path)


In [None]:
def compute_shap_for_best_model(best_artifact: dict, algo_name: str, cfg: dict):
    model = best_artifact['model']
    factors_all = best_artifact['factors_all']
    train_idx = best_artifact['train_idx']
    test_idx = best_artifact['test_idx']

    train_states = collect_states(factors_all, macro_state, train_idx, cfg['lookback'])
    test_states = collect_states(factors_all, macro_state, test_idx, cfg['lookback'])
    test_returns = target_tensor[test_idx].astype(np.float32)
    assert len(train_states) > 0 and len(test_states) > 0, 'insufficient states for SHAP'
    assert test_states.shape[0] == test_returns.shape[0], 'SHAP test states/returns mismatch'

    rng = np.random.default_rng(SEED)
    bg_n = min(cfg['shap_background'], len(train_states))
    ex_n = min(cfg['shap_explain'], len(test_states))
    bg_idx = rng.choice(len(train_states), size=bg_n, replace=False)
    ex_idx = rng.choice(len(test_states), size=ex_n, replace=False)
    bg_np = train_states[bg_idx].astype(np.float32)
    ex_np = test_states[ex_idx].astype(np.float32)

    action_module = build_action_module(model, algo_name)

    shap_method = 'GradientExplainer'
    try:
        bg_t = torch.tensor(bg_np, dtype=torch.float32)
        ex_t = torch.tensor(ex_np, dtype=torch.float32)
        explainer = shap.GradientExplainer(action_module, bg_t)
        shap_values = explainer.shap_values(ex_t)
        shap_arr = standardize_shap_values(shap_values, n_samples=ex_np.shape[0], n_features=ex_np.shape[1])
    except Exception as e:
        shap_method = f'KernelExplainerFallback:{type(e).__name__}'
        bg_small = bg_np[: min(20, len(bg_np))]
        ex_small = ex_np[: min(20, len(ex_np))]

        def predict_fn(x: np.ndarray) -> np.ndarray:
            x_t = torch.tensor(x, dtype=torch.float32)
            with torch.no_grad():
                out = action_module(x_t).cpu().numpy()
            return out

        explainer = shap.KernelExplainer(predict_fn, bg_small)
        shap_values = explainer.shap_values(ex_small, nsamples=cfg['shap_kernel_nsamples'])
        shap_arr = standardize_shap_values(shap_values, n_samples=ex_small.shape[0], n_features=ex_small.shape[1])
        ex_np = ex_small

    abs_sum_actions = np.abs(shap_arr).sum(axis=-1)  # [samples, features]
    feature_importance = np.nan_to_num(abs_sum_actions.mean(axis=0), nan=0.0, posinf=0.0, neginf=0.0)

    # SHAPが全ゼロ/非有限になる場合は、代替として勾配サリエンシーを使用
    if (not np.isfinite(feature_importance).all()) or float(np.sum(feature_importance)) <= 0.0:
        x_t = torch.tensor(ex_np, dtype=torch.float32, requires_grad=True)
        action_module.zero_grad(set_to_none=True)
        out = action_module(x_t)
        objective = out.abs().sum()
        objective.backward()
        grad = x_t.grad.detach().cpu().numpy()
        feature_importance = np.nan_to_num(np.abs(grad).mean(axis=0), nan=0.0, posinf=0.0, neginf=0.0)
        shap_method = shap_method + '+GradientSaliencyFallback'

    feature_names = make_state_feature_names(cfg['lookback'], cfg['num_factors'])
    feat_df = pd.DataFrame(
        {
            'feature': feature_names,
            'importance': feature_importance,
        }
    )
    feat_df['normalized_importance'] = normalize_importance_series(feat_df['importance'])
    feat_df = feat_df.sort_values('importance', ascending=False).reset_index(drop=True)

    factor_df = aggregate_factor_contrib_from_feature_importance(
        feature_importance=feature_importance,
        lookback=cfg['lookback'],
        num_factors=cfg['num_factors'],
    )
    factor_df = factor_df.sort_values('importance', ascending=False).reset_index(drop=True)

    return {
        'feature_df': feat_df,
        'factor_df': factor_df,
        'feature_importance': feature_importance,
        'shap_method': shap_method,
        'n_background': int(bg_np.shape[0]),
        'n_explain': int(ex_np.shape[0]),
    }


def compute_permutation_importance_for_best_model(best_artifact: dict, algo_name: str, cfg: dict):
    model = best_artifact['model']
    factors_all = best_artifact['factors_all']
    test_idx = best_artifact['test_idx']

    test_states = collect_states(factors_all, macro_state, test_idx, cfg['lookback'])
    test_returns = target_tensor[test_idx].astype(np.float32)
    assert test_states.shape[0] == test_returns.shape[0], 'Permutation test states/returns mismatch'

    baseline_mean_return, _, _ = compute_policy_portfolio_returns(
        model=model,
        algo_name=algo_name,
        states=test_states,
        next_returns=test_returns,
    )

    rng = np.random.default_rng(cfg['perm_seed'])
    n_samples, n_features = test_states.shape
    feature_names = make_state_feature_names(cfg['lookback'], cfg['num_factors'])

    rows = []
    importance = np.zeros(n_features, dtype=np.float64)

    for j in range(n_features):
        drops = []
        for _ in range(int(cfg['perm_repeats'])):
            perm_states = test_states.copy()
            perm_idx = rng.permutation(n_samples)
            perm_states[:, j] = perm_states[perm_idx, j]
            perm_mean_return, _, _ = compute_policy_portfolio_returns(
                model=model,
                algo_name=algo_name,
                states=perm_states,
                next_returns=test_returns,
            )
            drop = float(baseline_mean_return - perm_mean_return)
            if not np.isfinite(drop):
                drop = 0.0
            drops.append(drop)

        mean_drop = float(np.mean(drops))
        std_drop = float(np.std(drops, ddof=1)) if len(drops) > 1 else 0.0
        imp = float(max(mean_drop, 0.0))
        importance[j] = imp
        rows.append(
            {
                'feature': feature_names[j],
                'mean_drop': mean_drop,
                'std_drop': std_drop,
                'importance': imp,
            }
        )

    feat_df = pd.DataFrame(rows)
    feat_df['normalized_importance'] = normalize_importance_series(feat_df['importance'])
    feat_df = feat_df.sort_values('importance', ascending=False).reset_index(drop=True)

    factor_df = aggregate_factor_contrib_from_feature_importance(
        feature_importance=importance,
        lookback=cfg['lookback'],
        num_factors=cfg['num_factors'],
    )
    factor_df = factor_df.sort_values('importance', ascending=False).reset_index(drop=True)

    return {
        'feature_df': feat_df,
        'factor_df': factor_df,
        'baseline_mean_return': float(baseline_mean_return),
        'perm_repeats': int(cfg['perm_repeats']),
        'n_samples': int(n_samples),
    }


def compute_integrated_gradients_for_best_model(best_artifact: dict, algo_name: str, cfg: dict):
    model = best_artifact['model']
    factors_all = best_artifact['factors_all']
    train_idx = best_artifact['train_idx']
    test_idx = best_artifact['test_idx']

    train_states = collect_states(factors_all, macro_state, train_idx, cfg['lookback'])
    test_states = collect_states(factors_all, macro_state, test_idx, cfg['lookback'])
    test_returns = target_tensor[test_idx].astype(np.float32)
    assert train_states.shape[0] > 0 and test_states.shape[0] > 0, 'IG requires non-empty states'
    assert test_states.shape[0] == test_returns.shape[0], 'IG test states/returns mismatch'

    if cfg['ig_baseline_mode'] == 'train_mean':
        baseline_vec = train_states.mean(axis=0).astype(np.float32)
    else:
        baseline_vec = np.zeros(test_states.shape[1], dtype=np.float32)

    steps = int(cfg['ig_steps'])
    action_module = build_action_module(model, algo_name)

    baseline_t = torch.tensor(baseline_vec, dtype=torch.float32)
    feature_attr = []

    for state_vec, ret_vec in zip(test_states, test_returns):
        x_t = torch.tensor(state_vec, dtype=torch.float32)
        r_t = torch.tensor(ret_vec, dtype=torch.float32)
        total_grad = torch.zeros_like(x_t)

        for alpha in np.linspace(1.0 / steps, 1.0, steps):
            interp = baseline_t + float(alpha) * (x_t - baseline_t)
            interp = interp.detach().clone().requires_grad_(True)
            action = action_module(interp.unsqueeze(0))[0]
            weight = torch.softmax(action, dim=-1)
            target_scalar = torch.sum(weight * r_t)
            grad = torch.autograd.grad(target_scalar, interp, retain_graph=False, create_graph=False)[0]
            grad = torch.nan_to_num(grad, nan=0.0, posinf=0.0, neginf=0.0)
            total_grad += grad

        avg_grad = total_grad / float(steps)
        ig = (x_t - baseline_t) * avg_grad
        feature_attr.append(np.abs(ig.detach().cpu().numpy()))

    attr_arr = np.vstack(feature_attr)
    feature_importance = np.nan_to_num(attr_arr.mean(axis=0), nan=0.0, posinf=0.0, neginf=0.0)

    feature_names = make_state_feature_names(cfg['lookback'], cfg['num_factors'])
    feat_df = pd.DataFrame(
        {
            'feature': feature_names,
            'importance': feature_importance,
        }
    )
    feat_df['normalized_importance'] = normalize_importance_series(feat_df['importance'])
    feat_df = feat_df.sort_values('importance', ascending=False).reset_index(drop=True)

    factor_df = aggregate_factor_contrib_from_feature_importance(
        feature_importance=feature_importance,
        lookback=cfg['lookback'],
        num_factors=cfg['num_factors'],
    )
    factor_df = factor_df.sort_values('importance', ascending=False).reset_index(drop=True)

    return {
        'feature_df': feat_df,
        'factor_df': factor_df,
        'ig_steps': steps,
        'baseline_mode': cfg['ig_baseline_mode'],
        'n_samples': int(test_states.shape[0]),
    }


best_key = (best_algo, best_reward)
assert best_key in trained_models, f'best model not found in trained model store: {best_key}'
best_artifact = trained_models[best_key]

shap_out = compute_shap_for_best_model(best_artifact, best_algo, CONFIG)
perm_out = compute_permutation_importance_for_best_model(best_artifact, best_algo, CONFIG)
ig_out = compute_integrated_gradients_for_best_model(best_artifact, best_algo, CONFIG)

shap_feature_path = OUTPUT_DIR / 'phase_b_v3_shap_feature_importance.csv'
shap_factor_path = OUTPUT_DIR / 'phase_b_v3_shap_factor_contrib.csv'
shap_factor_png_path = OUTPUT_DIR / 'phase_b_v3_shap_factor_contrib.png'

perm_feature_path = OUTPUT_DIR / 'phase_b_v3_perm_feature_importance.csv'
perm_factor_path = OUTPUT_DIR / 'phase_b_v3_perm_factor_contrib.csv'
perm_factor_png_path = OUTPUT_DIR / 'phase_b_v3_perm_factor_contrib.png'

ig_feature_path = OUTPUT_DIR / 'phase_b_v3_ig_feature_importance.csv'
ig_factor_path = OUTPUT_DIR / 'phase_b_v3_ig_factor_contrib.csv'
ig_factor_png_path = OUTPUT_DIR / 'phase_b_v3_ig_factor_contrib.png'

factor_compare_path = OUTPUT_DIR / 'phase_b_v3_interpretability_factor_compare.csv'
evidence_path = OUTPUT_DIR / 'phase_b_v3_experiment_evidence.md'

shap_out['feature_df'].to_csv(shap_feature_path, index=False)
shap_out['factor_df'].to_csv(shap_factor_path, index=False)
perm_out['feature_df'].to_csv(perm_feature_path, index=False)
perm_out['factor_df'].to_csv(perm_factor_path, index=False)
ig_out['feature_df'].to_csv(ig_feature_path, index=False)
ig_out['factor_df'].to_csv(ig_factor_path, index=False)

plot_factor_contrib(shap_out['factor_df'], f'SHAP Factor Contribution ({best_method})', shap_factor_png_path)
plot_factor_contrib(perm_out['factor_df'], f'Permutation Factor Contribution ({best_method})', perm_factor_png_path)
plot_factor_contrib(ig_out['factor_df'], f'IG Factor Contribution ({best_method})', ig_factor_png_path)

factor_compare_df = pd.concat(
    [
        shap_out['factor_df'].assign(method='SHAP'),
        perm_out['factor_df'].assign(method='Permutation'),
        ig_out['factor_df'].assign(method='IntegratedGradients'),
    ],
    ignore_index=True,
)
factor_compare_df = factor_compare_df[['method', 'factor', 'importance', 'normalized_importance']]
factor_compare_df.to_csv(factor_compare_path, index=False)

# テスト結果（要件対応）
state_dim_expected = CONFIG['lookback'] * CONFIG['num_factors'] + 3
shape_alignment_pass = bool(
    len(shap_out['feature_df']) == state_dim_expected
    and len(perm_out['feature_df']) == state_dim_expected
    and len(ig_out['feature_df']) == state_dim_expected
    and best_artifact['test_idx'].shape[0] == perm_out['n_samples']
)

data_integrity_pass = bool(
    decision_index.is_monotonic_increasing
    and decision_index.is_unique
    and len(np.intersect1d(train_idx, test_idx)) == 0
    and decision_index[train_idx].max() < decision_index[test_idx].min()
)
alignment_pass = bool(train_idx.max() < test_idx.min())
reward_finite_pass = bool(np.isfinite(results_df['portfolio_return'].to_numpy(dtype=np.float64)).all())
coverage_pass = bool(len(rl_grid) == 6 and (rl_grid['n_dates'] == expected_points).all())
constraint_pass = True  # run loop 内で assert_long_only を実行済み
comparison_pass = bool((method_presence > 0).all().all())

perm_numeric_pass = bool(np.isfinite(perm_out['feature_df'][['mean_drop', 'std_drop', 'importance']].to_numpy()).all())
ig_numeric_pass = bool(np.isfinite(ig_out['feature_df'][['importance', 'normalized_importance']].to_numpy()).all())
perm_sum = float(perm_out['factor_df']['normalized_importance'].sum())
ig_sum = float(ig_out['factor_df']['normalized_importance'].sum())
shap_sum = float(shap_out['factor_df']['normalized_importance'].sum())
perm_sum_pass = bool(abs(perm_sum - 1.0) <= 1e-6)
ig_sum_pass = bool(abs(ig_sum - 1.0) <= 1e-6)
shap_sum_pass = bool(abs(shap_sum - 1.0) <= 1e-6)

perm_repeat_pass = bool(
    perm_out['perm_repeats'] == CONFIG['perm_repeats']
    and {'mean_drop', 'std_drop'}.issubset(set(perm_out['feature_df'].columns))
)
ig_config_pass = bool(ig_out['ig_steps'] == CONFIG['ig_steps'] and ig_out['baseline_mode'] == CONFIG['ig_baseline_mode'])

compare_methods = set(factor_compare_df['method'].unique().tolist())
compare_pass = compare_methods == {'SHAP', 'Permutation', 'IntegratedGradients'}

shap_zero_fallback = bool('GradientSaliencyFallback' in shap_out['shap_method'])
perm_zero_fallback = bool(float(perm_out['feature_df']['importance'].sum()) <= 0.0)
ig_zero_fallback = bool(float(ig_out['feature_df']['importance'].sum()) <= 0.0)


def df_to_markdown(df: pd.DataFrame, float_digits: int = 6) -> str:
    cols = list(df.columns)
    header = '| ' + ' | '.join(cols) + ' |\n'
    sep = '| ' + ' | '.join(['---'] * len(cols)) + ' |\n'
    rows = ''
    for _, row in df.iterrows():
        vals = []
        for c in cols:
            v = row[c]
            if isinstance(v, (float, np.floating)):
                vals.append(f'{float(v):.{float_digits}f}')
            else:
                vals.append(str(v))
        rows += '| ' + ' | '.join(vals) + ' |\n'
    return header + sep + rows


metrics_md = df_to_markdown(metrics_df, float_digits=6)
shap_top_md = df_to_markdown(shap_out['feature_df'].head(10), float_digits=6)
perm_top_md = df_to_markdown(perm_out['feature_df'].head(10), float_digits=6)
ig_top_md = df_to_markdown(ig_out['feature_df'].head(10), float_digits=6)
factor_compare_md = df_to_markdown(factor_compare_df.sort_values(['method', 'importance'], ascending=[True, False]), float_digits=6)


evidence_text = f'''# Phase B v3 Experiment Evidence (No-Fold Single Split)

- 実行日時: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
- ノートブック: `/Users/kencharoff/workspace/projects/rl/p03_deep_reinforcement_learning_in_factor_investment/notebooks/cafpo_phase_b_v3_no_fold.ipynb`
- split: Train(2001-01〜2020-12) / Test(2021-01〜2025-12)
- split_name: `{CONFIG['split_name']}`

## 1. 実験条件
- 条件行列: PPO/DDPG × [log_return, diff_sharpe, diff_ddr] = 6条件
- 制約: Long-only（`w>=0`, `sum(w)=1`）
- 報酬適応率: η = {CONFIG['reward_eta']}
- DDPG設定: lr={CONFIG['ddpg_learning_rate']}, buffer={CONFIG['ddpg_buffer_size']}, learning_starts={CONFIG['ddpg_learning_starts']}, batch={CONFIG['ddpg_batch_size']}, tau={CONFIG['ddpg_tau']}, gamma={CONFIG['ddpg_gamma']}, train_freq={CONFIG['ddpg_train_freq']}, gradient_steps={CONFIG['ddpg_gradient_steps']}, timesteps={CONFIG['ddpg_total_timesteps']}
- Value-weight: 未実装（時価総額データ不在）

## 2. 総合比較結果
{metrics_md}

## 3. 最良条件
- best_method: **{best_method}**
- best_algo: **{best_algo}**
- best_reward: **{best_reward}**
- 選定規則: Sharpe最大（同点時 Compound最大）

## 4. 解釈性比較（SHAP / Permutation / Integrated Gradients）
- SHAP: `abs(SHAP)` を9アクション方向に和し、特徴重要度を算出
- Permutation: 各特徴をtest期間でシャッフルし、平均月次リターン低下 `drop = baseline_mean_return - permuted_mean_return` を集計
- Integrated Gradients: baseline=`train状態平均`、steps={CONFIG['ig_steps']}、対象スカラー=`softmax(policy(s_t))·r_(t+1)`
- 因子集約: 同一因子kの12ラグ合算後、因子間正規化

### 4.1 SHAP Top Features
{shap_top_md}

### 4.2 Permutation Top Features
{perm_top_md}

### 4.3 Integrated Gradients Top Features
{ig_top_md}

### 4.4 Factor Contribution Comparison
{factor_compare_md}

### 4.5 Fallback/設定情報
- SHAP手法: {shap_out['shap_method']}
- SHAP background/explain: {shap_out['n_background']}/{shap_out['n_explain']}
- Permutation repeats: {perm_out['perm_repeats']}, baseline_mean_return={perm_out['baseline_mean_return']:.6f}
- IG baseline_mode/steps: {ig_out['baseline_mode']}/{ig_out['ig_steps']}
- ゼロ寄与フォールバック: SHAP={'YES' if shap_zero_fallback else 'NO'}, Permutation={'YES' if perm_zero_fallback else 'NO'}, IG={'YES' if ig_zero_fallback else 'NO'}

## 5. テスト結果
- データ整合テスト: {'PASS' if data_integrity_pass else 'FAIL'}
- 時点整合テスト（`X_t` と `r_(t+1)`）: {'PASS' if alignment_pass else 'FAIL'}
- 報酬関数テスト（非有限値なし）: {'PASS' if reward_finite_pass else 'FAIL'}
- 条件網羅テスト（6条件 × test60か月）: {'PASS' if coverage_pass else 'FAIL'}
- 制約テスト（Long-only）: {'PASS' if constraint_pass else 'FAIL'}
- 比較整合テスト（同一test dates）: {'PASS' if comparison_pass else 'FAIL'}
- 形状整合テスト（3手法 feature 長一致）: {'PASS' if shape_alignment_pass else 'FAIL'}
- 数値安定性テスト（Permutation/IG 非有限値なし）: {'PASS' if (perm_numeric_pass and ig_numeric_pass) else 'FAIL'}
- Permutation妥当性テスト（repeats/列）: {'PASS' if perm_repeat_pass else 'FAIL'}
- IG妥当性テスト（baseline/steps）: {'PASS' if ig_config_pass else 'FAIL'}
- 因子寄与合計=1テスト: {'PASS' if (shap_sum_pass and perm_sum_pass and ig_sum_pass) else 'FAIL'}
  - SHAP={shap_sum:.6f}, Permutation={perm_sum:.6f}, IG={ig_sum:.6f}
- 比較整合テスト（比較CSV method種別）: {'PASS' if compare_pass else 'FAIL'}

## 6. 生成物
- `/Users/kencharoff/workspace/projects/rl/p03_deep_reinforcement_learning_in_factor_investment/outputs/phase_b_v3_metrics.csv`
- `/Users/kencharoff/workspace/projects/rl/p03_deep_reinforcement_learning_in_factor_investment/outputs/phase_b_v3_cumulative_returns.csv`
- `/Users/kencharoff/workspace/projects/rl/p03_deep_reinforcement_learning_in_factor_investment/outputs/phase_b_v3_cumulative_returns.png`
- `/Users/kencharoff/workspace/projects/rl/p03_deep_reinforcement_learning_in_factor_investment/outputs/phase_b_v3_split_grid.csv`
- `/Users/kencharoff/workspace/projects/rl/p03_deep_reinforcement_learning_in_factor_investment/outputs/phase_b_v3_shap_feature_importance.csv`
- `/Users/kencharoff/workspace/projects/rl/p03_deep_reinforcement_learning_in_factor_investment/outputs/phase_b_v3_shap_factor_contrib.csv`
- `/Users/kencharoff/workspace/projects/rl/p03_deep_reinforcement_learning_in_factor_investment/outputs/phase_b_v3_shap_factor_contrib.png`
- `/Users/kencharoff/workspace/projects/rl/p03_deep_reinforcement_learning_in_factor_investment/outputs/phase_b_v3_perm_feature_importance.csv`
- `/Users/kencharoff/workspace/projects/rl/p03_deep_reinforcement_learning_in_factor_investment/outputs/phase_b_v3_perm_factor_contrib.csv`
- `/Users/kencharoff/workspace/projects/rl/p03_deep_reinforcement_learning_in_factor_investment/outputs/phase_b_v3_perm_factor_contrib.png`
- `/Users/kencharoff/workspace/projects/rl/p03_deep_reinforcement_learning_in_factor_investment/outputs/phase_b_v3_ig_feature_importance.csv`
- `/Users/kencharoff/workspace/projects/rl/p03_deep_reinforcement_learning_in_factor_investment/outputs/phase_b_v3_ig_factor_contrib.csv`
- `/Users/kencharoff/workspace/projects/rl/p03_deep_reinforcement_learning_in_factor_investment/outputs/phase_b_v3_ig_factor_contrib.png`
- `/Users/kencharoff/workspace/projects/rl/p03_deep_reinforcement_learning_in_factor_investment/outputs/phase_b_v3_interpretability_factor_compare.csv`
- `/Users/kencharoff/workspace/projects/rl/p03_deep_reinforcement_learning_in_factor_investment/outputs/phase_b_v3_experiment_evidence.md`
'''

evidence_path.write_text(evidence_text, encoding='utf-8')

required_outputs = [
    metrics_path,
    cum_csv_path,
    cum_png_path,
    split_grid_path,
    shap_feature_path,
    shap_factor_path,
    shap_factor_png_path,
    perm_feature_path,
    perm_factor_path,
    perm_factor_png_path,
    ig_feature_path,
    ig_factor_path,
    ig_factor_png_path,
    factor_compare_path,
    evidence_path,
]
for p in required_outputs:
    assert p.exists(), f'missing output: {p}'

print('SHAP method:', shap_out['shap_method'])
print('Permutation repeats:', perm_out['perm_repeats'])
print('IG baseline/steps:', ig_out['baseline_mode'], ig_out['ig_steps'])
print('factor sums:', shap_sum, perm_sum, ig_sum)
print('saved evidence:', evidence_path)


## まとめ

- foldベース評価を廃止し、単発split（Train: 2001-2020 / Test: 2021-2025）へ切替。
- 比較条件は `PPO/DDPG × 3報酬` の6条件を維持。
- 出力は `phase_b_v3_*` として保存し、`phase_b_v2_*` とは分離。
- 解釈性は `best_method` 1条件に対し、SHAP + Permutation + Integrated Gradients を併用。
- 因子寄与は3手法すべてで12ラグ集約・正規化して比較。
