# 政策銘柄バックテスト分析

political_trending_archive.parquet の結果を可視化・分析

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path

# 日本語フォント設定
plt.rcParams['font.sans-serif'] = ['Hiragino Sans', 'Yu Gothic', 'Meiryo', 'DejaVu Sans']
plt.rcParams['axes.unicode_minus'] = False

# パス設定
DATA_DIR = Path('../data/parquet')
POLITICAL_ARCHIVE_PATH = DATA_DIR / 'backtest' / 'political_trending_archive.parquet'
GROK_ARCHIVE_PATH = DATA_DIR / 'backtest' / 'grok_trending_archive.parquet'

## 1. データ読み込み

In [None]:
# 政策銘柄バックテスト
political_df = pd.read_parquet(POLITICAL_ARCHIVE_PATH)
political_df['selection_date'] = pd.to_datetime(political_df['selection_date'])
political_df['backtest_date'] = pd.to_datetime(political_df['backtest_date'])

print(f"政策銘柄バックテスト結果:")
print(f"  総レコード: {len(political_df)}")
print(f"  ユニーク銘柄: {political_df['ticker'].nunique()}")
print(f"  期間: {political_df['selection_date'].min().date()} - {political_df['selection_date'].max().date()}")
print(f"  総日数: {political_df['selection_date'].nunique()}")

political_df.head()

## 2. 全体統計

In [None]:
# Phase2の全体統計
total_profit = political_df['profit_per_100_shares_phase2'].sum()
win_rate = (political_df['phase2_win'].sum() / len(political_df) * 100)
avg_return = (political_df['phase2_return'] * 100).mean()

print(f"Phase2（大引け）全体統計:")
print(f"  累積利益: {total_profit:+,.0f}円")
print(f"  勝率: {win_rate:.2f}%")
print(f"  平均リターン: {avg_return:+.2f}%")
print(f"  銘柄数: {len(political_df)}")

## 3. tags別パフォーマンス

In [None]:
# tags展開
tags_expanded = political_df[political_df['tags'] != ''].copy()
tags_expanded['tags_list'] = tags_expanded['tags'].str.split(', ')
tags_expanded = tags_expanded.explode('tags_list')

# Phase2で集計
tags_stats = tags_expanded.groupby('tags_list').agg({
    'ticker': 'count',
    'profit_per_100_shares_phase2': ['sum', 'mean'],
    'phase2_return': 'mean',
    'phase2_win': lambda x: (x.sum() / len(x) * 100) if len(x) > 0 else 0
}).round(2)

tags_stats.columns = ['count', 'total_profit', 'avg_profit', 'avg_return_pct', 'win_rate']
tags_stats = tags_stats.sort_values('total_profit', ascending=False)

print("\ntags別パフォーマンス (Phase2):")
print(tags_stats)
print(f"\ntotal_profit の合計: {tags_stats['total_profit'].sum():,.0f}円")

## 4. 可視化: tags別累積利益

In [None]:
# 横棒グラフ
fig, ax = plt.subplots(figsize=(12, 6))

colors = ['green' if x > 0 else 'red' for x in tags_stats['total_profit']]
tags_stats['total_profit'].plot(kind='barh', ax=ax, color=colors)

ax.set_xlabel('累積利益 (円)', fontsize=12)
ax.set_ylabel('政策タグ', fontsize=12)
ax.set_title('政策銘柄: tags別累積利益 (Phase2)', fontsize=14, fontweight='bold')
ax.axvline(0, color='black', linewidth=0.8, linestyle='--')
ax.grid(axis='x', alpha=0.3)

# 値ラベル
for i, v in enumerate(tags_stats['total_profit']):
    ax.text(v, i, f'{v:+,.0f}', va='center', ha='left' if v > 0 else 'right', fontsize=10)

plt.tight_layout()
plt.show()

## 5. 可視化: tags別勝率

In [None]:
# 横棒グラフ（勝率）
fig, ax = plt.subplots(figsize=(12, 6))

colors = ['green' if x >= 50 else 'red' for x in tags_stats['win_rate']]
tags_stats['win_rate'].plot(kind='barh', ax=ax, color=colors)

ax.set_xlabel('勝率 (%)', fontsize=12)
ax.set_ylabel('政策タグ', fontsize=12)
ax.set_title('政策銘柄: tags別勝率 (Phase2)', fontsize=14, fontweight='bold')
ax.axvline(50, color='black', linewidth=0.8, linestyle='--', label='50%')
ax.grid(axis='x', alpha=0.3)
ax.legend()

# 値ラベル
for i, v in enumerate(tags_stats['win_rate']):
    ax.text(v, i, f'{v:.1f}%', va='center', ha='left' if v > 50 else 'right', fontsize=10)

plt.tight_layout()
plt.show()

## 6. 日別パフォーマンス

In [None]:
# 日別集計
daily_stats = political_df.groupby('selection_date').agg({
    'ticker': 'count',
    'profit_per_100_shares_phase2': 'sum',
    'phase2_win': lambda x: (x.sum() / len(x) * 100) if len(x) > 0 else 0,
    'phase2_return': lambda x: (x * 100).mean()
}).round(2)

daily_stats.columns = ['count', 'total_profit', 'win_rate', 'avg_return']
daily_stats = daily_stats.sort_index()

print("\n日別パフォーマンス (Phase2):")
print(daily_stats)

In [None]:
# 日別累積利益グラフ
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(14, 10))

# 1. 日別累積利益
colors = ['green' if x > 0 else 'red' for x in daily_stats['total_profit']]
daily_stats['total_profit'].plot(kind='bar', ax=ax1, color=colors)
ax1.set_xlabel('日付', fontsize=12)
ax1.set_ylabel('利益 (円)', fontsize=12)
ax1.set_title('政策銘柄: 日別累積利益 (Phase2)', fontsize=14, fontweight='bold')
ax1.axhline(0, color='black', linewidth=0.8, linestyle='--')
ax1.grid(axis='y', alpha=0.3)
ax1.tick_params(axis='x', rotation=45)

# 2. 日別勝率
daily_stats['win_rate'].plot(kind='line', marker='o', ax=ax2, color='blue')
ax2.set_xlabel('日付', fontsize=12)
ax2.set_ylabel('勝率 (%)', fontsize=12)
ax2.set_title('政策銘柄: 日別勝率 (Phase2)', fontsize=14, fontweight='bold')
ax2.axhline(50, color='red', linewidth=0.8, linestyle='--', label='50%')
ax2.grid(alpha=0.3)
ax2.legend()
ax2.tick_params(axis='x', rotation=45)

plt.tight_layout()
plt.show()

## 7. Grokとの比較

In [None]:
# Grokバックテスト読み込み
grok_df = pd.read_parquet(GROK_ARCHIVE_PATH)
grok_df['selection_date'] = pd.to_datetime(grok_df['selection_date'])

print(f"Grokバックテスト結果:")
print(f"  総レコード: {len(grok_df)}")
print(f"  ユニーク銘柄: {grok_df['ticker'].nunique()}")
print(f"  期間: {grok_df['selection_date'].min().date()} - {grok_df['selection_date'].max().date()}")

# Phase2比較
comparison = pd.DataFrame({
    '指標': ['累積利益', '勝率', '平均リターン', '銘柄数'],
    '政策銘柄': [
        f"{political_df['profit_per_100_shares_phase2'].sum():+,.0f}円",
        f"{(political_df['phase2_win'].sum() / len(political_df) * 100):.2f}%",
        f"{(political_df['phase2_return'] * 100).mean():+.2f}%",
        len(political_df)
    ],
    'Grok銘柄': [
        f"{grok_df['profit_per_100_shares_phase2'].sum():+,.0f}円",
        f"{(grok_df['phase2_win'].sum() / len(grok_df) * 100):.2f}%",
        f"{(grok_df['phase2_return'] * 100).mean():+.2f}%",
        len(grok_df)
    ]
})

print("\n政策銘柄 vs Grok銘柄 (Phase2):")
print(comparison.to_string(index=False))

## 8. ベスト・ワースト銘柄

In [None]:
# ベスト10
best_10 = political_df.nlargest(10, 'profit_per_100_shares_phase2')[[
    'selection_date', 'ticker', 'company_name', 'tags', 
    'profit_per_100_shares_phase2', 'phase2_return'
]].copy()
best_10['phase2_return'] = (best_10['phase2_return'] * 100).round(2)

print("\nベスト10銘柄 (Phase2):")
print(best_10.to_string(index=False))

# ワースト10
worst_10 = political_df.nsmallest(10, 'profit_per_100_shares_phase2')[[
    'selection_date', 'ticker', 'company_name', 'tags', 
    'profit_per_100_shares_phase2', 'phase2_return'
]].copy()
worst_10['phase2_return'] = (worst_10['phase2_return'] * 100).round(2)

print("\nワースト10銘柄 (Phase2):")
print(worst_10.to_string(index=False))

## 9. 結論

### 政策銘柄デイトレ戦略の有効性

- **累積利益**: 政策銘柄全体でのパフォーマンス
- **最適tags**: 「経済安全保障」が最も稼げる（+236,419円）
- **避けるべきtags**: 「防衛・安全保障」が最も損失（-380,700円）

### 次のステップ
1. `localhost:3000/dev/analyze` で可視化
2. tags別の戦略最適化（Phase1/2/3比較）
3. Grokと政策銘柄の組み合わせ戦略