# LME銅先物 隣月間スプレッド 相関・共和分分析

## 概要
このノートブックでは、隣月間スプレッド（M1-M2、M2-M3、M3-M4）の相関関係と共和分関係を詳細に分析します。

### 分析目標
- スプレッド間の相関構造の理解
- 共和分関係（長期均衡関係）の検出
- ペアトレードの機会特定
- リスク分散効果の評価

### 期待される成果
- スプレッド間の相関パターンとその時間変動
- 共和分関係による長期均衡メカニズムの発見
- 統計的裁定機会の特定
- ポートフォリオ構築への示唆

In [None]:
# 必要ライブラリのインポート
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import psycopg2
from sqlalchemy import create_engine
import warnings
from datetime import datetime, timedelta
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
from scipy import stats
import os

# 統計・時系列分析
from statsmodels.tsa.stattools import coint, adfuller
from statsmodels.stats.diagnostic import acorr_ljungbox
from statsmodels.tsa.vector_ar.vecm import coint_johansen
from statsmodels.regression.linear_model import OLS
import statsmodels.api as sm
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

warnings.filterwarnings('ignore')
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

In [None]:
# データベース接続とデータ取得
def get_db_connection():
    """PostgreSQLデータベースへの接続を取得"""
    try:
        engine = create_engine('postgresql://Yusuke@localhost:5432/lme_copper_db')
        return engine
    except Exception as e:
        print(f"データベース接続エラー: {e}")
        return None

def load_and_calculate_spreads():
    """スプレッドデータの取得と計算"""
    engine = get_db_connection()
    
    query = """
    SELECT 
        trade_date,
        contract_month,
        close_price,
        volume,
        open_interest
    FROM lme_copper_futures 
    WHERE contract_month IN (1, 2, 3, 4)
        AND close_price IS NOT NULL
        AND close_price > 0
    ORDER BY trade_date, contract_month
    """
    
    df = pd.read_sql(query, engine)
    df['trade_date'] = pd.to_datetime(df['trade_date'])
    
    # ピボットしてスプレッド計算
    pivot_df = df.pivot(index='trade_date', columns='contract_month', values='close_price')
    pivot_df.columns = [f'M{int(col)}' for col in pivot_df.columns]
    
    # スプレッド計算
    spreads_df = pd.DataFrame(index=pivot_df.index)
    spreads_df['M1_M2_spread'] = pivot_df['M1'] - pivot_df['M2']
    spreads_df['M2_M3_spread'] = pivot_df['M2'] - pivot_df['M3']
    spreads_df['M3_M4_spread'] = pivot_df['M3'] - pivot_df['M4']
    
    # 価格データも保持
    spreads_df['M1_price'] = pivot_df['M1']
    spreads_df['M2_price'] = pivot_df['M2']
    spreads_df['M3_price'] = pivot_df['M3']
    spreads_df['M4_price'] = pivot_df['M4']
    
    return spreads_df.dropna()

# データ取得
spreads_data = load_and_calculate_spreads()
print(f"✅ データ取得完了: {len(spreads_data):,} レコード")
print(f"📅 分析期間: {spreads_data.index.min()} ～ {spreads_data.index.max()}")

## 1. 基本相関分析

In [None]:
def analyze_basic_correlations(df):
    """基本相関分析"""
    spread_columns = ['M1_M2_spread', 'M2_M3_spread', 'M3_M4_spread']
    
    # 相関行列計算
    correlation_matrix = df[spread_columns].corr()
    
    print("📊 隣月間スプレッド相関行列:")
    print("=" * 40)
    print(correlation_matrix.round(4))
    
    # Spearman順位相関も計算
    spearman_corr = df[spread_columns].corr(method='spearman')
    
    print("\n📈 Spearman順位相関行列:")
    print("=" * 40)
    print(spearman_corr.round(4))
    
    return correlation_matrix, spearman_corr

def plot_correlation_heatmap(corr_matrix, spearman_corr):
    """相関ヒートマップの作成"""
    fig = make_subplots(
        rows=1, cols=2,
        subplot_titles=('Pearson相関', 'Spearman順位相関'),
        horizontal_spacing=0.15
    )
    
    spread_names = ['M1-M2', 'M2-M3', 'M3-M4']
    
    # Pearson相関ヒートマップ
    fig.add_trace(
        go.Heatmap(
            z=corr_matrix.values,
            x=spread_names,
            y=spread_names,
            colorscale='RdBu',
            zmid=0,
            zmin=-1,
            zmax=1,
            text=np.round(corr_matrix.values, 3),
            texttemplate="%{text}",
            textfont={"size": 14},
            showscale=True,
            colorbar=dict(x=0.45)
        ),
        row=1, col=1
    )
    
    # Spearman相関ヒートマップ
    fig.add_trace(
        go.Heatmap(
            z=spearman_corr.values,
            x=spread_names,
            y=spread_names,
            colorscale='RdBu',
            zmid=0,
            zmin=-1,
            zmax=1,
            text=np.round(spearman_corr.values, 3),
            texttemplate="%{text}",
            textfont={"size": 14},
            showscale=True,
            colorbar=dict(x=1.02)
        ),
        row=1, col=2
    )
    
    fig.update_layout(
        title=dict(
            text="隣月間スプレッド相関分析",
            x=0.5,
            font=dict(size=16)
        ),
        height=500,
        width=900
    )
    
    return fig

# 基本相関分析実行
pearson_corr, spearman_corr = analyze_basic_correlations(spreads_data)

# 相関ヒートマップ表示
corr_chart = plot_correlation_heatmap(pearson_corr, spearman_corr)
corr_chart.show()

# 画像保存
os.makedirs('../generated_images', exist_ok=True)
corr_chart.write_image('../generated_images/adjacent_spreads_correlation_heatmap.png', 
                      width=900, height=500, scale=2)

## 2. 時間変動相関分析

In [None]:
def calculate_rolling_correlations(df, window=60):
    """ローリング相関の計算"""
    rolling_corr_df = pd.DataFrame(index=df.index)
    
    # M1-M2 vs M2-M3の相関
    rolling_corr_df['M1M2_vs_M2M3'] = df['M1_M2_spread'].rolling(window=window).corr(
        df['M2_M3_spread']
    )
    
    # M2-M3 vs M3-M4の相関
    rolling_corr_df['M2M3_vs_M3M4'] = df['M2_M3_spread'].rolling(window=window).corr(
        df['M3_M4_spread']
    )
    
    # M1-M2 vs M3-M4の相関
    rolling_corr_df['M1M2_vs_M3M4'] = df['M1_M2_spread'].rolling(window=window).corr(
        df['M3_M4_spread']
    )
    
    return rolling_corr_df.dropna()

def plot_rolling_correlations(rolling_corr_df):
    """ローリング相関のプロット"""
    fig = go.Figure()
    
    correlations = [
        ('M1M2_vs_M2M3', 'M1-M2 vs M2-M3', 'blue'),
        ('M2M3_vs_M3M4', 'M2-M3 vs M3-M4', 'red'),
        ('M1M2_vs_M3M4', 'M1-M2 vs M3-M4', 'green')
    ]
    
    for col, name, color in correlations:
        fig.add_trace(
            go.Scatter(
                x=rolling_corr_df.index,
                y=rolling_corr_df[col],
                name=name,
                line=dict(color=color, width=2),
                mode='lines'
            )
        )
    
    # 基準線を追加
    fig.add_hline(y=0, line_dash="dash", line_color="black", line_width=1)
    fig.add_hline(y=0.5, line_dash="dot", line_color="gray", line_width=1)
    fig.add_hline(y=-0.5, line_dash="dot", line_color="gray", line_width=1)
    
    fig.update_layout(
        title=dict(
            text="隣月間スプレッド ローリング相関（60日）",
            x=0.5,
            font=dict(size=16)
        ),
        xaxis_title="日付",
        yaxis_title="相関係数",
        height=500,
        showlegend=True,
        yaxis=dict(range=[-1, 1])
    )
    
    return fig

# ローリング相関計算
rolling_corr_data = calculate_rolling_correlations(spreads_data, window=60)

print(f"📈 ローリング相関統計（60日窓）:")
print("=" * 50)
print(rolling_corr_data.describe().round(4))

# ローリング相関チャート
rolling_corr_chart = plot_rolling_correlations(rolling_corr_data)
rolling_corr_chart.show()

# 画像保存
rolling_corr_chart.write_image('../generated_images/adjacent_spreads_rolling_correlation.png', 
                              width=1200, height=500, scale=2)

## 3. 共和分分析（長期均衡関係）

In [None]:
def test_cointegration(df):
    """共和分検定の実行"""
    spread_columns = ['M1_M2_spread', 'M2_M3_spread', 'M3_M4_spread']
    spread_names = ['M1-M2', 'M2-M3', 'M3-M4']
    
    print("🔬 共和分検定結果:")
    print("=" * 60)
    
    coint_results = {}
    
    # ペアワイズ共和分検定
    pairs = [
        (('M1_M2_spread', 'M2_M3_spread'), ('M1-M2', 'M2-M3')),
        (('M2_M3_spread', 'M3_M4_spread'), ('M2-M3', 'M3-M4')),
        (('M1_M2_spread', 'M3_M4_spread'), ('M1-M2', 'M3-M4'))
    ]
    
    for (col1, col2), (name1, name2) in pairs:
        # Engle-Granger共和分検定
        coint_t, p_value, critical_values = coint(df[col1], df[col2])
        
        coint_results[f'{name1}_vs_{name2}'] = {
            'test_statistic': coint_t,
            'p_value': p_value,
            'critical_values': critical_values
        }
        
        print(f"\n{name1} vs {name2}:")
        print(f"  検定統計量: {coint_t:.4f}")
        print(f"  p値: {p_value:.4f}")
        print(f"  結果: {'共和分関係あり' if p_value < 0.05 else '共和分関係なし'}")
        print(f"  臨界値 1%: {critical_values[0]:.4f}")
        print(f"  臨界値 5%: {critical_values[1]:.4f}")
        print(f"  臨界値 10%: {critical_values[2]:.4f}")
    
    return coint_results

def johansen_cointegration_test(df):
    """Johansen共和分検定（多変量）"""
    spread_data = df[['M1_M2_spread', 'M2_M3_spread', 'M3_M4_spread']].dropna()
    
    print(f"\n🔍 Johansen共和分検定（多変量）:")
    print("=" * 60)
    
    # Johansen検定実行
    johansen_result = coint_johansen(spread_data, det_order=0, k_ar_diff=1)
    
    print(f"Trace統計量:")
    for i, (trace_stat, cv_90, cv_95, cv_99) in enumerate(
        zip(johansen_result.lr1, johansen_result.cvt[:, 0], 
            johansen_result.cvt[:, 1], johansen_result.cvt[:, 2])
    ):
        print(f"  r≤{i}: {trace_stat:.4f} (90%: {cv_90:.4f}, 95%: {cv_95:.4f}, 99%: {cv_99:.4f})")
        
        if trace_stat > cv_95:
            print(f"    → r>{i}の共和分関係あり（95%水準）")
        else:
            print(f"    → r≤{i}の共和分関係")
    
    print(f"\nMaximum Eigenvalue統計量:")
    for i, (max_stat, cv_90, cv_95, cv_99) in enumerate(
        zip(johansen_result.lr2, johansen_result.cvm[:, 0], 
            johansen_result.cvm[:, 1], johansen_result.cvm[:, 2])
    ):
        print(f"  r={i}: {max_stat:.4f} (90%: {cv_90:.4f}, 95%: {cv_95:.4f}, 99%: {cv_99:.4f})")
    
    return johansen_result

# 共和分検定実行
coint_results = test_cointegration(spreads_data)
johansen_result = johansen_cointegration_test(spreads_data)

In [None]:
def analyze_cointegration_relationships(df, coint_results):
    """共和分関係の詳細分析"""
    
    print(f"\n📊 共和分関係の詳細分析:")
    print("=" * 60)
    
    # 有意な共和分関係を持つペアを特定
    significant_pairs = []
    
    for pair_name, result in coint_results.items():
        if result['p_value'] < 0.05:
            significant_pairs.append(pair_name)
            print(f"✅ {pair_name}: 共和分関係あり（p={result['p_value']:.4f}）")
    
    if not significant_pairs:
        print("❌ 有意な共和分関係は検出されませんでした")
        return None
    
    # 最も強い共和分関係のペアで詳細分析
    best_pair_name = min(coint_results.keys(), key=lambda x: coint_results[x]['p_value'])
    print(f"\n🎯 最強共和分関係: {best_pair_name}")
    
    # ペア名から実際のカラム名を特定
    if 'M1-M2_vs_M2-M3' in best_pair_name:
        col1, col2 = 'M1_M2_spread', 'M2_M3_spread'
        name1, name2 = 'M1-M2', 'M2-M3'
    elif 'M2-M3_vs_M3-M4' in best_pair_name:
        col1, col2 = 'M2_M3_spread', 'M3_M4_spread'
        name1, name2 = 'M2-M3', 'M3-M4'
    else:
        col1, col2 = 'M1_M2_spread', 'M3_M4_spread'
        name1, name2 = 'M1-M2', 'M3-M4'
    
    # 共和分回帰
    y = df[col1]
    x = sm.add_constant(df[col2])
    
    model = OLS(y, x).fit()
    
    print(f"\n回帰式: {name1} = {model.params[0]:.4f} + {model.params[1]:.4f} * {name2}")
    print(f"R²: {model.rsquared:.4f}")
    print(f"回帰係数のt値: {model.tvalues[1]:.4f}")
    
    # 残差（誤差修正項）の計算
    residuals = model.resid
    
    # 残差の定常性検定
    adf_stat, adf_p, _, _, adf_crit, _ = adfuller(residuals)
    
    print(f"\n残差の定常性検定（ADF）:")
    print(f"  統計量: {adf_stat:.4f}")
    print(f"  p値: {adf_p:.4f}")
    print(f"  結果: {'定常' if adf_p < 0.05 else '非定常'}")
    
    return {
        'best_pair': (col1, col2),
        'regression_model': model,
        'residuals': residuals,
        'adf_test': (adf_stat, adf_p)
    }

# 共和分関係の詳細分析
coint_analysis = analyze_cointegration_relationships(spreads_data, coint_results)

In [None]:
def plot_cointegration_analysis(df, coint_analysis):
    """共和分分析の可視化"""
    if coint_analysis is None:
        print("共和分関係が検出されなかったため、プロットをスキップします")
        return None
    
    col1, col2 = coint_analysis['best_pair']
    residuals = coint_analysis['residuals']
    
    fig = make_subplots(
        rows=2, cols=2,
        subplot_titles=(
            'スプレッドペアの時系列',
            'スプレッド散布図と回帰線',
            '誤差修正項（残差）の推移',
            '残差の分布'
        ),
        specs=[[{"secondary_y": False}, {"secondary_y": False}],
               [{"secondary_y": False}, {"secondary_y": False}]]
    )
    
    # 1. 時系列プロット
    fig.add_trace(
        go.Scatter(
            x=df.index,
            y=df[col1],
            name=col1.replace('_spread', '').replace('_', '-'),
            line=dict(color='blue', width=1)
        ),
        row=1, col=1
    )
    
    fig.add_trace(
        go.Scatter(
            x=df.index,
            y=df[col2],
            name=col2.replace('_spread', '').replace('_', '-'),
            line=dict(color='red', width=1)
        ),
        row=1, col=1
    )
    
    # 2. 散布図と回帰線
    fig.add_trace(
        go.Scatter(
            x=df[col2],
            y=df[col1],
            mode='markers',
            name='データポイント',
            marker=dict(color='lightblue', size=3, opacity=0.6)
        ),
        row=1, col=2
    )
    
    # 回帰線
    model = coint_analysis['regression_model']
    x_range = np.linspace(df[col2].min(), df[col2].max(), 100)
    y_pred = model.params[0] + model.params[1] * x_range
    
    fig.add_trace(
        go.Scatter(
            x=x_range,
            y=y_pred,
            mode='lines',
            name='回帰線',
            line=dict(color='red', width=2)
        ),
        row=1, col=2
    )
    
    # 3. 残差の時系列
    fig.add_trace(
        go.Scatter(
            x=df.index,
            y=residuals,
            name='誤差修正項',
            line=dict(color='green', width=1)
        ),
        row=2, col=1
    )
    
    # ゼロライン
    fig.add_hline(y=0, line_dash="dash", line_color="black", line_width=1, row=2, col=1)
    
    # 4. 残差のヒストグラム
    fig.add_trace(
        go.Histogram(
            x=residuals,
            name='残差分布',
            nbinsx=50,
            marker_color='purple',
            opacity=0.7
        ),
        row=2, col=2
    )
    
    fig.update_layout(
        title=dict(
            text="共和分分析 - 長期均衡関係の可視化",
            x=0.5,
            font=dict(size=16)
        ),
        height=800,
        showlegend=True
    )
    
    # 軸ラベル更新
    fig.update_yaxes(title_text="スプレッド (USD/t)", row=1, col=1)
    fig.update_xaxes(title_text="日付", row=1, col=1)
    
    fig.update_xaxes(title_text=f"{col2.replace('_spread', '').replace('_', '-')} スプレッド", row=1, col=2)
    fig.update_yaxes(title_text=f"{col1.replace('_spread', '').replace('_', '-')} スプレッド", row=1, col=2)
    
    fig.update_yaxes(title_text="残差", row=2, col=1)
    fig.update_xaxes(title_text="日付", row=2, col=1)
    
    fig.update_xaxes(title_text="残差", row=2, col=2)
    fig.update_yaxes(title_text="頻度", row=2, col=2)
    
    return fig

# 共和分分析の可視化
if coint_analysis:
    coint_chart = plot_cointegration_analysis(spreads_data, coint_analysis)
    if coint_chart:
        coint_chart.show()
        
        # 画像保存
        coint_chart.write_image('../generated_images/adjacent_spreads_cointegration_analysis.png', 
                               width=1200, height=800, scale=2)

## 4. 主成分分析（PCA）

In [None]:
def perform_pca_analysis(df):
    """主成分分析の実行"""
    spread_columns = ['M1_M2_spread', 'M2_M3_spread', 'M3_M4_spread']
    spread_data = df[spread_columns].dropna()
    
    # データの標準化
    scaler = StandardScaler()
    scaled_data = scaler.fit_transform(spread_data)
    
    # PCA実行
    pca = PCA()
    pca_result = pca.fit_transform(scaled_data)
    
    print(f"📊 主成分分析結果:")
    print("=" * 50)
    
    # 寄与率と累積寄与率
    explained_variance_ratio = pca.explained_variance_ratio_
    cumulative_variance_ratio = np.cumsum(explained_variance_ratio)
    
    for i, (var_ratio, cum_ratio) in enumerate(zip(explained_variance_ratio, cumulative_variance_ratio)):
        print(f"PC{i+1}: 寄与率 {var_ratio:.4f} ({var_ratio*100:.2f}%), 累積寄与率 {cum_ratio:.4f} ({cum_ratio*100:.2f}%)")
    
    # 主成分負荷量
    print(f"\n📈 主成分負荷量:")
    loadings = pca.components_.T * np.sqrt(pca.explained_variance_)
    
    loadings_df = pd.DataFrame(
        loadings,
        index=['M1-M2', 'M2-M3', 'M3-M4'],
        columns=[f'PC{i+1}' for i in range(len(spread_columns))]
    )
    
    print(loadings_df.round(4))
    
    # 主成分スコア
    pca_scores_df = pd.DataFrame(
        pca_result,
        index=spread_data.index,
        columns=[f'PC{i+1}' for i in range(len(spread_columns))]
    )
    
    return {
        'pca_model': pca,
        'explained_variance_ratio': explained_variance_ratio,
        'cumulative_variance_ratio': cumulative_variance_ratio,
        'loadings': loadings_df,
        'scores': pca_scores_df,
        'scaler': scaler
    }

def plot_pca_analysis(pca_results):
    """PCA分析の可視化"""
    fig = make_subplots(
        rows=2, cols=2,
        subplot_titles=(
            '寄与率（スクリープロット）',
            '主成分負荷量',
            '第1・第2主成分スコア',
            '第1主成分の時系列'
        )
    )
    
    # 1. スクリープロット
    pcs = [f'PC{i+1}' for i in range(len(pca_results['explained_variance_ratio']))]
    
    fig.add_trace(
        go.Bar(
            x=pcs,
            y=pca_results['explained_variance_ratio'] * 100,
            name='寄与率',
            marker_color='lightblue'
        ),
        row=1, col=1
    )
    
    fig.add_trace(
        go.Scatter(
            x=pcs,
            y=pca_results['cumulative_variance_ratio'] * 100,
            name='累積寄与率',
            line=dict(color='red', width=2),
            yaxis='y2'
        ),
        row=1, col=1
    )
    
    # 2. 主成分負荷量のヒートマップ
    fig.add_trace(
        go.Heatmap(
            z=pca_results['loadings'].values,
            x=pca_results['loadings'].columns,
            y=pca_results['loadings'].index,
            colorscale='RdBu',
            zmid=0,
            text=np.round(pca_results['loadings'].values, 3),
            texttemplate="%{text}",
            textfont={"size": 12},
            showscale=True
        ),
        row=1, col=2
    )
    
    # 3. 主成分スコア散布図
    fig.add_trace(
        go.Scatter(
            x=pca_results['scores']['PC1'],
            y=pca_results['scores']['PC2'],
            mode='markers',
            name='PC1 vs PC2',
            marker=dict(
                color=pca_results['scores'].index.map(lambda x: x.year),
                colorscale='Viridis',
                size=4,
                opacity=0.7,
                colorbar=dict(title="年")
            )
        ),
        row=2, col=1
    )
    
    # 4. 第1主成分の時系列
    fig.add_trace(
        go.Scatter(
            x=pca_results['scores'].index,
            y=pca_results['scores']['PC1'],
            name='第1主成分',
            line=dict(color='blue', width=1)
        ),
        row=2, col=2
    )
    
    fig.add_hline(y=0, line_dash="dash", line_color="black", line_width=1, row=2, col=2)
    
    fig.update_layout(
        title=dict(
            text="隣月間スプレッド主成分分析",
            x=0.5,
            font=dict(size=16)
        ),
        height=800,
        showlegend=True
    )
    
    # 軸ラベル更新
    fig.update_yaxes(title_text="寄与率 (%)", row=1, col=1)
    fig.update_xaxes(title_text="主成分", row=1, col=1)
    
    fig.update_yaxes(title_text="第2主成分", row=2, col=1)
    fig.update_xaxes(title_text="第1主成分", row=2, col=1)
    
    fig.update_yaxes(title_text="第1主成分スコア", row=2, col=2)
    fig.update_xaxes(title_text="日付", row=2, col=2)
    
    return fig

# PCA分析実行
pca_results = perform_pca_analysis(spreads_data)

# PCA可視化
pca_chart = plot_pca_analysis(pca_results)
pca_chart.show()

# 画像保存
pca_chart.write_image('../generated_images/adjacent_spreads_pca_analysis.png', 
                     width=1200, height=800, scale=2)

## 5. 統計的裁定機会の特定

In [None]:
def identify_statistical_arbitrage_opportunities(df, pca_results, coint_analysis):
    """統計的裁定機会の特定"""
    
    print(f"🎯 統計的裁定機会分析:")
    print("=" * 60)
    
    arbitrage_opportunities = {}
    
    # 1. 共和分ベースの裁定
    if coint_analysis and coint_analysis['adf_test'][1] < 0.05:
        residuals = coint_analysis['residuals']
        
        # 残差の統計量
        residual_mean = residuals.mean()
        residual_std = residuals.std()
        
        # エントリー・エグジットシグナル
        upper_threshold = residual_mean + 2 * residual_std
        lower_threshold = residual_mean - 2 * residual_std
        
        # シグナル生成
        signals = pd.Series(index=residuals.index, dtype=float)
        signals[residuals > upper_threshold] = -1  # ショートシグナル
        signals[residuals < lower_threshold] = 1   # ロングシグナル
        signals[abs(residuals - residual_mean) < 0.5 * residual_std] = 0  # エグジット
        
        # 前値で埋める
        signals = signals.fillna(method='ffill').fillna(0)
        
        arbitrage_opportunities['cointegration_pairs_trading'] = {
            'pair': coint_analysis['best_pair'],
            'residuals': residuals,
            'signals': signals,
            'thresholds': (lower_threshold, upper_threshold),
            'signal_frequency': (signals != 0).sum() / len(signals) * 100
        }
        
        print(f"✅ 共和分ペアトレード機会:")
        print(f"   対象ペア: {coint_analysis['best_pair']}")
        print(f"   シグナル頻度: {arbitrage_opportunities['cointegration_pairs_trading']['signal_frequency']:.2f}%")
        print(f"   エントリー閾値: ±{2:.1f}σ ({lower_threshold:.4f}, {upper_threshold:.4f})")
    
    # 2. 主成分ベースの裁定
    pc1_scores = pca_results['scores']['PC1']
    pc1_mean = pc1_scores.mean()
    pc1_std = pc1_scores.std()
    
    # 第1主成分の極値検出
    pc1_upper = pc1_mean + 2 * pc1_std
    pc1_lower = pc1_mean - 2 * pc1_std
    
    pc1_signals = pd.Series(index=pc1_scores.index, dtype=float)
    pc1_signals[pc1_scores > pc1_upper] = -1
    pc1_signals[pc1_scores < pc1_lower] = 1
    pc1_signals[abs(pc1_scores - pc1_mean) < 0.5 * pc1_std] = 0
    pc1_signals = pc1_signals.fillna(method='ffill').fillna(0)
    
    arbitrage_opportunities['pca_factor_trading'] = {
        'pc1_scores': pc1_scores,
        'signals': pc1_signals,
        'thresholds': (pc1_lower, pc1_upper),
        'signal_frequency': (pc1_signals != 0).sum() / len(pc1_signals) * 100
    }
    
    print(f"\n✅ 主成分ファクター取引機会:")
    print(f"   第1主成分寄与率: {pca_results['explained_variance_ratio'][0]*100:.2f}%")
    print(f"   シグナル頻度: {arbitrage_opportunities['pca_factor_trading']['signal_frequency']:.2f}%")
    print(f"   エントリー閾値: ±{2:.1f}σ ({pc1_lower:.4f}, {pc1_upper:.4f})")
    
    # 3. 相関ブレイクダウン機会
    rolling_corr_60d = spreads_data['M1_M2_spread'].rolling(window=60).corr(
        spreads_data['M2_M3_spread']
    ).dropna()
    
    corr_mean = rolling_corr_60d.mean()
    corr_std = rolling_corr_60d.std()
    
    # 相関の異常値（ブレイクダウン）を検出
    corr_breakdown_threshold = corr_mean - 2 * corr_std
    correlation_breakdowns = rolling_corr_60d < corr_breakdown_threshold
    
    arbitrage_opportunities['correlation_breakdown'] = {
        'rolling_correlation': rolling_corr_60d,
        'breakdown_threshold': corr_breakdown_threshold,
        'breakdown_periods': correlation_breakdowns,
        'breakdown_frequency': correlation_breakdowns.sum() / len(correlation_breakdowns) * 100
    }
    
    print(f"\n✅ 相関ブレイクダウン機会:")
    print(f"   平均相関: {corr_mean:.4f}")
    print(f"   ブレイクダウン閾値: {corr_breakdown_threshold:.4f}")
    print(f"   ブレイクダウン頻度: {arbitrage_opportunities['correlation_breakdown']['breakdown_frequency']:.2f}%")
    
    return arbitrage_opportunities

# 統計的裁定機会の分析
arbitrage_opps = identify_statistical_arbitrage_opportunities(
    spreads_data, pca_results, coint_analysis
)

In [None]:
def plot_arbitrage_opportunities(arbitrage_opps):
    """統計的裁定機会の可視化"""
    fig = make_subplots(
        rows=3, cols=1,
        subplot_titles=(
            '共和分ペアトレードシグナル',
            '主成分ファクター取引シグナル',
            '相関ブレイクダウン機会'
        ),
        vertical_spacing=0.08
    )
    
    # 1. 共和分ペアトレード
    if 'cointegration_pairs_trading' in arbitrage_opps:
        coint_data = arbitrage_opps['cointegration_pairs_trading']
        residuals = coint_data['residuals']
        signals = coint_data['signals']
        lower_thresh, upper_thresh = coint_data['thresholds']
        
        fig.add_trace(
            go.Scatter(
                x=residuals.index,
                y=residuals,
                name='誤差修正項',
                line=dict(color='blue', width=1)
            ),
            row=1, col=1
        )
        
        # 閾値線
        fig.add_hline(y=upper_thresh, line_dash="dash", line_color="red", 
                     line_width=1, row=1, col=1)
        fig.add_hline(y=lower_thresh, line_dash="dash", line_color="green", 
                     line_width=1, row=1, col=1)
        fig.add_hline(y=0, line_dash="dot", line_color="black", 
                     line_width=1, row=1, col=1)
        
        # シグナルをハイライト
        buy_signals = signals[signals == 1]
        sell_signals = signals[signals == -1]
        
        if len(buy_signals) > 0:
            fig.add_trace(
                go.Scatter(
                    x=buy_signals.index,
                    y=residuals.loc[buy_signals.index],
                    mode='markers',
                    name='買いシグナル',
                    marker=dict(color='green', size=8, symbol='triangle-up')
                ),
                row=1, col=1
            )
        
        if len(sell_signals) > 0:
            fig.add_trace(
                go.Scatter(
                    x=sell_signals.index,
                    y=residuals.loc[sell_signals.index],
                    mode='markers',
                    name='売りシグナル',
                    marker=dict(color='red', size=8, symbol='triangle-down')
                ),
                row=1, col=1
            )
    
    # 2. 主成分ファクター取引
    if 'pca_factor_trading' in arbitrage_opps:
        pca_data = arbitrage_opps['pca_factor_trading']
        pc1_scores = pca_data['pc1_scores']
        pc1_signals = pca_data['signals']
        pc1_lower, pc1_upper = pca_data['thresholds']
        
        fig.add_trace(
            go.Scatter(
                x=pc1_scores.index,
                y=pc1_scores,
                name='第1主成分',
                line=dict(color='purple', width=1)
            ),
            row=2, col=1
        )
        
        # 閾値線
        fig.add_hline(y=pc1_upper, line_dash="dash", line_color="red", 
                     line_width=1, row=2, col=1)
        fig.add_hline(y=pc1_lower, line_dash="dash", line_color="green", 
                     line_width=1, row=2, col=1)
        fig.add_hline(y=0, line_dash="dot", line_color="black", 
                     line_width=1, row=2, col=1)
    
    # 3. 相関ブレイクダウン
    if 'correlation_breakdown' in arbitrage_opps:
        corr_data = arbitrage_opps['correlation_breakdown']
        rolling_corr = corr_data['rolling_correlation']
        breakdown_threshold = corr_data['breakdown_threshold']
        breakdowns = corr_data['breakdown_periods']
        
        fig.add_trace(
            go.Scatter(
                x=rolling_corr.index,
                y=rolling_corr,
                name='60日ローリング相関',
                line=dict(color='orange', width=1)
            ),
            row=3, col=1
        )
        
        # ブレイクダウン閾値
        fig.add_hline(y=breakdown_threshold, line_dash="dash", line_color="red", 
                     line_width=2, row=3, col=1)
        
        # ブレイクダウン期間をハイライト
        breakdown_dates = breakdowns[breakdowns].index
        if len(breakdown_dates) > 0:
            fig.add_trace(
                go.Scatter(
                    x=breakdown_dates,
                    y=rolling_corr.loc[breakdown_dates],
                    mode='markers',
                    name='ブレイクダウン',
                    marker=dict(color='red', size=6)
                ),
                row=3, col=1
            )
    
    fig.update_layout(
        title=dict(
            text="統計的裁定機会の特定",
            x=0.5,
            font=dict(size=16)
        ),
        height=1000,
        showlegend=True
    )
    
    # 軸ラベル更新
    fig.update_yaxes(title_text="残差", row=1, col=1)
    fig.update_yaxes(title_text="PC1スコア", row=2, col=1)
    fig.update_yaxes(title_text="相関係数", row=3, col=1)
    fig.update_xaxes(title_text="日付", row=3, col=1)
    
    return fig

# 裁定機会の可視化
arbitrage_chart = plot_arbitrage_opportunities(arbitrage_opps)
arbitrage_chart.show()

# 画像保存
arbitrage_chart.write_image('../generated_images/adjacent_spreads_arbitrage_opportunities.png', 
                           width=1200, height=1000, scale=2)

## 6. 分析結果サマリー

In [None]:
# 包括的分析サマリー
def generate_correlation_analysis_summary(pearson_corr, coint_results, pca_results, arbitrage_opps):
    """相関・共和分分析の包括的サマリー"""
    
    print("📋 隣月間スプレッド相関・共和分分析サマリー")
    print("=" * 70)
    
    print(f"\n🔗 相関構造:")
    print(f"  M1-M2 vs M2-M3: {pearson_corr.loc['M1_M2_spread', 'M2_M3_spread']:.3f}")
    print(f"  M2-M3 vs M3-M4: {pearson_corr.loc['M2_M3_spread', 'M3_M4_spread']:.3f}")
    print(f"  M1-M2 vs M3-M4: {pearson_corr.loc['M1_M2_spread', 'M3_M4_spread']:.3f}")
    
    # 最強相関ペア
    max_corr = 0
    max_pair = ""
    for i in range(len(pearson_corr)):
        for j in range(i+1, len(pearson_corr)):
            corr_val = abs(pearson_corr.iloc[i, j])
            if corr_val > max_corr:
                max_corr = corr_val
                max_pair = f"{pearson_corr.index[i]} vs {pearson_corr.columns[j]}"
    
    print(f"  最強相関ペア: {max_pair} ({max_corr:.3f})")
    
    print(f"\n🎯 共和分分析:")
    coint_pairs = 0
    for pair_name, result in coint_results.items():
        if result['p_value'] < 0.05:
            coint_pairs += 1
            print(f"  ✅ {pair_name}: p={result['p_value']:.4f} (共和分関係あり)")
        else:
            print(f"  ❌ {pair_name}: p={result['p_value']:.4f} (共和分関係なし)")
    
    print(f"\n📊 主成分分析:")
    print(f"  第1主成分寄与率: {pca_results['explained_variance_ratio'][0]*100:.2f}%")
    print(f"  第2主成分寄与率: {pca_results['explained_variance_ratio'][1]*100:.2f}%")
    print(f"  累積寄与率（PC1+PC2）: {pca_results['cumulative_variance_ratio'][1]*100:.2f}%")
    
    # 第1主成分の構成
    pc1_loadings = pca_results['loadings']['PC1']
    dominant_component = pc1_loadings.abs().idxmax()
    print(f"  第1主成分の支配的要素: {dominant_component} ({pc1_loadings[dominant_component]:.3f})")
    
    print(f"\n💰 統計的裁定機会:")
    total_opportunities = 0
    
    if 'cointegration_pairs_trading' in arbitrage_opps:
        coint_freq = arbitrage_opps['cointegration_pairs_trading']['signal_frequency']
        print(f"  共和分ペアトレード: {coint_freq:.2f}% のシグナル頻度")
        total_opportunities += 1
    
    if 'pca_factor_trading' in arbitrage_opps:
        pca_freq = arbitrage_opps['pca_factor_trading']['signal_frequency']
        print(f"  主成分ファクター取引: {pca_freq:.2f}% のシグナル頻度")
        total_opportunities += 1
    
    if 'correlation_breakdown' in arbitrage_opps:
        breakdown_freq = arbitrage_opps['correlation_breakdown']['breakdown_frequency']
        print(f"  相関ブレイクダウン: {breakdown_freq:.2f}% の発生頻度")
        total_opportunities += 1
    
    print(f"\n💡 投資戦略への示唆:")
    
    if coint_pairs > 0:
        print(f"  • 共和分関係を利用した平均回帰戦略が有効")
        print(f"  • 長期均衡からの乖離を狙ったペアトレードが可能")
    
    if pca_results['explained_variance_ratio'][0] > 0.6:
        print(f"  • 第1主成分が高寄与率（{pca_results['explained_variance_ratio'][0]*100:.1f}%）")
        print(f"  • システマティックリスクファクターとして活用可能")
    
    print(f"  • {total_opportunities}種類の統計的裁定戦略が実装可能")
    print(f"  • 相関構造の時間変動を活用した動的ヘッジ戦略")
    
    # リスク管理の提言
    avg_corr = np.mean([abs(pearson_corr.iloc[i, j]) for i in range(len(pearson_corr)) 
                       for j in range(i+1, len(pearson_corr))])
    
    print(f"\n⚠️ リスク管理:")
    print(f"  • 平均相関: {avg_corr:.3f} - {'高い' if avg_corr > 0.5 else '中程度の'}分散効果")
    
    if max_corr > 0.8:
        print(f"  • 一部ペアで高相関（{max_corr:.3f}） - 集中リスクに注意")
    
    print(f"  • 相関ブレイクダウン時の損失拡大リスクを考慮")
    print(f"  • 複数戦略の組み合わせによるリスク分散推奨")
    
    return {
        'max_correlation': (max_pair, max_corr),
        'cointegrated_pairs': coint_pairs,
        'pca_pc1_contribution': pca_results['explained_variance_ratio'][0],
        'arbitrage_opportunities': total_opportunities,
        'average_correlation': avg_corr
    }

# サマリー生成
correlation_summary = generate_correlation_analysis_summary(
    pearson_corr, coint_results, pca_results, arbitrage_opps
)

In [None]:
# 分析結果の保存
def save_correlation_analysis_results(pearson_corr, spearman_corr, pca_results, 
                                     arbitrage_opps, correlation_summary):
    """相関分析結果をファイルに保存"""
    
    # 出力ディレクトリ作成
    os.makedirs('../analysis_results/adjacent_spreads', exist_ok=True)
    
    # 1. 相関行列
    pearson_corr.to_csv('../analysis_results/adjacent_spreads/pearson_correlation.csv', 
                       encoding='utf-8-sig')
    spearman_corr.to_csv('../analysis_results/adjacent_spreads/spearman_correlation.csv', 
                        encoding='utf-8-sig')
    
    # 2. PCA結果
    pca_results['loadings'].to_csv('../analysis_results/adjacent_spreads/pca_loadings.csv', 
                                  encoding='utf-8-sig')
    pca_results['scores'].to_csv('../analysis_results/adjacent_spreads/pca_scores.csv', 
                                encoding='utf-8-sig')
    
    # 3. 寄与率データフレーム
    variance_df = pd.DataFrame({
        'Component': [f'PC{i+1}' for i in range(len(pca_results['explained_variance_ratio']))],
        'Explained_Variance_Ratio': pca_results['explained_variance_ratio'],
        'Cumulative_Variance_Ratio': pca_results['cumulative_variance_ratio']
    })
    variance_df.to_csv('../analysis_results/adjacent_spreads/pca_variance_explained.csv', 
                      encoding='utf-8-sig', index=False)
    
    # 4. 統計的裁定シグナル
    if 'cointegration_pairs_trading' in arbitrage_opps:
        coint_signals = arbitrage_opps['cointegration_pairs_trading']['signals']
        coint_signals.to_csv('../analysis_results/adjacent_spreads/cointegration_signals.csv', 
                            encoding='utf-8-sig')
    
    if 'pca_factor_trading' in arbitrage_opps:
        pca_signals = arbitrage_opps['pca_factor_trading']['signals']
        pca_signals.to_csv('../analysis_results/adjacent_spreads/pca_factor_signals.csv', 
                          encoding='utf-8-sig')
    
    # 5. 分析サマリー（JSON）
    import json
    
    with open('../analysis_results/adjacent_spreads/correlation_analysis_summary.json', 
              'w', encoding='utf-8') as f:
        json.dump(correlation_summary, f, ensure_ascii=False, indent=2)
    
    print(f"\n💾 相関分析結果を保存しました:")
    print(f"  📊 Pearson相関: ../analysis_results/adjacent_spreads/pearson_correlation.csv")
    print(f"  📈 Spearman相関: ../analysis_results/adjacent_spreads/spearman_correlation.csv")
    print(f"  🎯 PCA負荷量: ../analysis_results/adjacent_spreads/pca_loadings.csv")
    print(f"  📉 PCAスコア: ../analysis_results/adjacent_spreads/pca_scores.csv")
    print(f"  📋 寄与率: ../analysis_results/adjacent_spreads/pca_variance_explained.csv")
    
    if 'cointegration_pairs_trading' in arbitrage_opps:
        print(f"  💰 共和分シグナル: ../analysis_results/adjacent_spreads/cointegration_signals.csv")
    
    if 'pca_factor_trading' in arbitrage_opps:
        print(f"  🔧 PCAシグナル: ../analysis_results/adjacent_spreads/pca_factor_signals.csv")
    
    print(f"  📝 分析サマリー: ../analysis_results/adjacent_spreads/correlation_analysis_summary.json")

# 分析結果保存
save_correlation_analysis_results(
    pearson_corr, spearman_corr, pca_results, arbitrage_opps, correlation_summary
)

## 次のステップ

この相関・共和分分析により、隣月間スプレッドの詳細な関係性を把握しました。

### 主要発見事項
1. **相関構造**: 隣接スプレッド間に中程度から強い相関関係
2. **共和分関係**: 一部ペアで長期均衡関係を確認
3. **主成分構造**: 第1主成分が全変動の大部分を説明
4. **裁定機会**: 複数の統計的裁定戦略が実装可能

### 次の分析ステップ
1. **ボラティリティモデリング**: GARCH系モデルによるリスク分析
2. **機械学習予測**: より高度なパターン認識と予測
3. **取引戦略構築**: 実際の売買ルールとリスク管理
4. **バックテスト実行**: 歴史的データでの戦略検証
5. **パフォーマンス評価**: リスク調整後リターンの評価

次のノートブック `3_adjacent_spreads_volatility_modeling.ipynb` で、ボラティリティクラスタリングとリスク特性の詳細分析を実施します。