# Cross-Impact Analysis of Order Flow Imbalance
This notebook implements the analysis and creates visualizations following the paper methodology.

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import sys

sys.path.append('../scripts')

from data_preprocessing import DataPreprocessor
from ofi_calculator import OFICalculator
from cross_impact_analyzer import CrossImpactAnalyzer

plt.style.use('seaborn-v0_8')
plt.rcParams['figure.figsize'] = [12, 8]

Path('../results/correlations').mkdir(parents=True, exist_ok=True)
Path('../results/impact_analysis').mkdir(parents=True, exist_ok=True)
Path('../results/multi_level_impacts').mkdir(parents=True, exist_ok=True)

In [2]:
STOCKS = ['AAPL', 'MSFT', 'NVDA', 'AMGN', 'GILD', 'TSLA', 'PEP', 'JPM', 'V', 'XOM']
SECTORS = {
    'AAPL': 'Tech',
    'MSFT': 'Tech',
    'NVDA': 'Tech',
    'AMGN': 'Healthcare',
    'GILD': 'Healthcare',
    'TSLA': 'Consumer Discretionary',
    'PEP': 'Consumer Discretionary',
    'JPM': 'Financials',
    'V': 'Financials',
    'XOM': 'Energy'
}

preprocessor = DataPreprocessor(n_levels=5)
ofi_calc = OFICalculator(n_levels=5)
impact_analyzer = CrossImpactAnalyzer(alpha=0.01)

In [None]:
print("Loading and processing data...")
returns_df, processed_data = preprocessor.process_multiple_stocks(
    data_dir='../data',
    stocks=STOCKS,
    freq='1min'
)

print("Computing OFIs...")
integrated_ofis_df, multi_level_ofis = ofi_calc.process_multiple_stocks(
    processed_data,
    freq='1min'
)

In [8]:
def create_multi_level_ofi_heatmap():
    """Create heatmap of multi-level OFI correlations."""
    plt.figure(figsize=(15, 5))
    
    for idx, stock in enumerate(STOCKS):
        plt.subplot(2, 5, idx+1)
        corr = multi_level_ofis[stock].corr()
        sns.heatmap(corr, annot=True, fmt='.2f', cmap='RdBu_r')
        plt.title(f'{stock} Multi-Level OFI Correlations')
    
    plt.tight_layout()
    plt.savefig('../results/correlations/multi_level_ofi_correlations.png')
    plt.close()

In [5]:
def create_contemporaneous_impact_plots():
    """Create comprehensive visualizations for contemporaneous impact analysis."""
    results = impact_analyzer.compute_contemporaneous_impact(
        returns=returns_df,
        multi_level_ofis=multi_level_ofis,
        integrated_ofis=integrated_ofis_df
    )

    # Create main figure with subplots
    fig = plt.figure(figsize=(20, 15))
    gs = plt.GridSpec(2, 2)

    # 1. Integrated OFI cross-impact heatmap
    ax1 = fig.add_subplot(gs[0, 0])
    integrated_coefs = pd.DataFrame(
        [res['CI_I']['coefficients'] for res in results.values()],
        index=STOCKS,
        columns=STOCKS
    )
    sns.heatmap(integrated_coefs, annot=True, fmt='.2f', cmap='RdBu_r',
                center=0, square=True, ax=ax1)
    ax1.set_title('Integrated OFI Cross-Impact Coefficients')

    # 2. Model performance comparison
    ax2 = fig.add_subplot(gs[0, 1])
    r2_df = pd.DataFrame({
        'PI_ML': [res['PI_ML']['r2'] for res in results.values()],
        'PI_I': [res['PI_I']['r2'] for res in results.values()],
        'CI_ML': [res['CI_ML']['r2'] for res in results.values()],
        'CI_I': [res['CI_I']['r2'] for res in results.values()]
    }, index=STOCKS)
    r2_df.plot(kind='bar', ax=ax2)
    ax2.set_title('Model Performance Comparison (R²)')
    ax2.set_xticklabels(STOCKS, rotation=45)
    ax2.legend(title='Model Type')

    # 3. Multi-level coefficients structure
    ax3 = fig.add_subplot(gs[1, 0])
    level_structure = {}
    for level in range(5):
        level_coefs = []
        for stock in STOCKS:
            stock_results = results[stock]['CI_ML']['coefficients']
            level_coefs.append(stock_results[level::5].mean())
        level_structure[f'Level {level}'] = level_coefs
    
    level_df = pd.DataFrame(level_structure, index=STOCKS)
    sns.heatmap(level_df, annot=True, fmt='.2f', cmap='RdBu_r',
                center=0, ax=ax3)
    ax3.set_title('Average Impact by Order Book Level')

    # 4. Sector-aggregated impact
    ax4 = fig.add_subplot(gs[1, 1])
    sector_impact = pd.DataFrame(index=list(set(SECTORS.values())),
                            columns=list(set(SECTORS.values())),
                            dtype=float)
    
    for from_sector in sector_impact.index:
        for to_sector in sector_impact.columns:
            from_stocks = [s for s, sec in SECTORS.items() if sec == from_sector]
            to_stocks = [s for s, sec in SECTORS.items() if sec == to_sector]
            
            impacts = []
            for to_stock in to_stocks:
                for from_stock in from_stocks:
                    impacts.append(results[to_stock]['CI_I']['coefficients'][from_stock])
            
            sector_impact.loc[from_sector, to_sector] = np.mean(impacts)
    
    sns.heatmap(sector_impact, annot=True, fmt='.2f', cmap='RdBu_r',
                center=0, ax=ax4)
    ax4.set_title('Sector-Level Cross-Impact')

    plt.tight_layout()
    plt.savefig('../results/impact_analysis/contemporaneous_impact_analysis.png',
                bbox_inches='tight', dpi=300)
    plt.close()
    
    # 5. Level-specific cross-impact for individual stocks
    for stock in STOCKS:
        plt.figure(figsize=(15, 10))
        stock_results = results[stock]['CI_ML']['coefficients']
        level_impacts = np.zeros((5, len(STOCKS)))
        
        for level in range(5):
            level_impacts[level] = stock_results[level::5]
        
        sns.heatmap(level_impacts, annot=True, fmt='.2f', cmap='RdBu_r',
                    center=0,
                    xticklabels=STOCKS,
                    yticklabels=[f'Level {i}' for i in range(5)])
        plt.title(f'Multi-Level Cross-Impact for {stock}')
        plt.tight_layout()
        plt.savefig(f'../results/multi_level_impacts/multi_level_impact_{stock}.png',
                    bbox_inches='tight', dpi=300)
        plt.close()

    # 6. Cross-level correlation structure
    plt.figure(figsize=(10, 8))
    level_labels = [f'L{i}' for i in range(5)]
    level_corr = pd.DataFrame(index=level_labels, columns=level_labels)

    # Average correlation across stocks
    for i in range(5):
        for j in range(5):
            corrs = []
            for stock in STOCKS:
                stock_results = results[stock]['CI_ML']['coefficients']
                coeffs_i = stock_results[i::5]
                coeffs_j = stock_results[j::5]
                if len(coeffs_i) > 0 and len(coeffs_j) > 0:
                    if np.std(coeffs_i) > 0 and np.std(coeffs_j) > 0:
                        corr = np.corrcoef(coeffs_i, coeffs_j)[0,1]
                        if not np.isnan(corr):
                            corrs.append(corr)
            
            level_corr.iloc[i,j] = np.mean(corrs) if corrs else 0.0

    level_corr = level_corr.astype(float)

    if not level_corr.isnull().all().all():
        sns.heatmap(level_corr, annot=True, fmt='.2f', cmap='RdBu_r',
                    square=True)
        plt.title('Average Cross-Level Correlation Structure')
        plt.tight_layout()
        plt.savefig('../results/correlations/level_correlation_structure.png',
                    bbox_inches='tight', dpi=300)
        plt.close()

In [6]:
def create_predictive_impact_plots():
    """Create visualizations for predictive impact analysis."""
    pred_results = impact_analyzer.compute_predictive_impact(
        returns=returns_df,
        multi_level_ofis=multi_level_ofis,
        integrated_ofis=integrated_ofis_df
    )
    
    horizons = sorted(pred_results['AAPL'].keys())
    models = ['FPI_ML', 'FPI_I', 'FCI_ML', 'FCI_I']
    colors = ['blue', 'green', 'red', 'purple']
    
    plt.figure(figsize=(15, 10))
    
    # Plot 1: R² decay comparison
    plt.subplot(2, 2, 1)
    r2_decay = {
        model: [
            np.mean([
                pred_results[stock][h][model]['r2']
                for stock in STOCKS
            ])
            for h in horizons
        ]
        for model in models
    }
    
    for model, color in zip(models, colors):
        plt.plot(horizons, r2_decay[model], 
                marker='o', label=model, color=color)
    
    plt.xlabel('Prediction Horizon (minutes)')
    plt.ylabel('Average R²')
    plt.title('Predictive Power Decay by Model')
    plt.legend()
    plt.grid(True)
    
    # Plot 2: Coefficient heatmap for 1-minute horizon
    plt.subplot(2, 2, 2)
    coef_matrix = pd.DataFrame(
        [pred_results[stock][1]['FCI_I']['coefficients'] for stock in STOCKS],
        index=STOCKS,
        columns=STOCKS
    )
    sns.heatmap(coef_matrix, cmap='RdBu_r', center=0,
                annot=True, fmt='.2f')
    plt.title('1-Minute Ahead Cross-Impact Coefficients')
    
    # Plot 3: Model comparison boxplots
    plt.subplot(2, 2, 3)
    model_r2s = {
        model: [
            pred_results[stock][1][model]['r2']
            for stock in STOCKS
        ]
        for model in models
    }
    
    plt.boxplot(model_r2s.values(), labels=models)
    plt.xticks(rotation=45)
    plt.ylabel('R²')
    plt.title('1-Minute Ahead Model Performance Comparison')
    plt.grid(True)
    
    # Plot 4: Sector-wise predictive power
    plt.subplot(2, 2, 4)
    sector_r2s = {
        sector: [
            np.mean([
                pred_results[stock][h]['FCI_I']['r2']
                for stock in STOCKS
                if SECTORS[stock] == sector
            ])
            for h in horizons
        ]
        for sector in set(SECTORS.values())
    }
    
    for sector, r2s in sector_r2s.items():
        plt.plot(horizons, r2s, marker='o', label=sector)
    
    plt.xlabel('Prediction Horizon (minutes)')
    plt.ylabel('Average R²')
    plt.title('Predictive Power Decay by Sector')
    plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
    plt.grid(True)
    
    plt.tight_layout()
    plt.savefig('../results/impact_analysis/predictive_impact_analysis.png',
                bbox_inches='tight', dpi=300)
    plt.close()
    
    # Plot 5: Cross-impact evolution
    plt.figure(figsize=(12, 8))
    evolution_horizons = [1, 5, 30]  # Key horizons to show
    
    for idx, horizon in enumerate(evolution_horizons):
        plt.subplot(1, 3, idx+1)
        coef_matrix = pd.DataFrame(
            [pred_results[stock][horizon]['FCI_I']['coefficients'] 
             for stock in STOCKS],
            index=STOCKS,
            columns=STOCKS
        )
        sns.heatmap(coef_matrix, cmap='RdBu_r', center=0,
                    annot=True, fmt='.2f')
        plt.title(f'{horizon}-Minute Ahead Cross-Impact')
    
    plt.tight_layout()
    plt.savefig('../results/impact_analysis/cross_impact_evolution.png',
                bbox_inches='tight', dpi=300)
    plt.close()

In [None]:
print("Creating visualizations...")
create_multi_level_ofi_heatmap()
create_contemporaneous_impact_plots()
create_predictive_impact_plots()
print("All visualizations saved to ../results")