In [None]:
%load_ext autoreload
%autoreload 2

In [13]:
# notebooks/ofi_analysis.ipynb


interval = "300s"
results_dir = "../results"

# Import necessary libraries
import pandas as pd
import numpy as np
import sys
import os

# Add the scripts directory to the Python path
sys.path.append(os.path.abspath(os.path.join("..", "scripts")))

# Import scripts
from ofi_calculation import calculate_order_flows, aggregate_order_book_data
from pca_integration import integrate_ofi_with_pca
from returns import calculate_log_returns, plot_log_return_time_series
from cross_impact import contemporaneous_cross_impact, preprocess_data
from visualization import plot_ofi_time_series, plot_ofi_distribution, plot_ofi_heatmap, plot_ofi_boxplot, plot_heatmap, compare_self_vs_cross_impact

# Load order book data
order_book_data = pd.read_csv("../data/order_book_trades_15min.csv")

# Drop unnecessary columns
columns_to_drop = [
    'bid_px_05', 'ask_px_05', 'bid_sz_05', 'ask_sz_05', 'bid_ct_05', 'ask_ct_05',
    'bid_px_06', 'ask_px_06', 'bid_sz_06', 'ask_sz_06', 'bid_ct_06', 'ask_ct_06',
    'bid_px_07', 'ask_px_07', 'bid_sz_07', 'ask_sz_07', 'bid_ct_07', 'ask_ct_07',
    'bid_px_08', 'ask_px_08', 'bid_sz_08', 'ask_sz_08', 'bid_ct_08', 'ask_ct_08',
    'bid_px_09', 'ask_px_09', 'bid_sz_09', 'ask_sz_09', 'bid_ct_09', 'ask_ct_09'
]
order_book_data = order_book_data.drop(columns=columns_to_drop)

# Ensure timestamps are in datetime format
order_book_data["ts_event"] = pd.to_datetime(order_book_data["ts_event"]).dt.tz_localize(None)

# Filter the data
cutoff_time = pd.to_datetime("2025-01-06 10:31:00")
filtered_data = order_book_data[order_book_data["ts_event"] < cutoff_time]

# Aggregate data if interval is specified
if interval != '':
    filtered_data = aggregate_order_book_data(filtered_data, levels=5, interval=interval)

# Sort data by symbol and timestamp
filtered_data = filtered_data.sort_values(by=["symbol", "ts_event"])
filtered_data = filtered_data.dropna()

# Step 1: Calculate OFI
filtered_data = calculate_order_flows(filtered_data)

# Step 2: Integrate OFI using PCA
filtered_data = integrate_ofi_with_pca(filtered_data)

# Step 3: Calculate logarithmic returns
log_returns = calculate_log_returns(filtered_data)

# Step 4: Merge OFI and returns data
merged_data = pd.merge(filtered_data, log_returns, on=["symbol", "ts_event"])
columns_to_keep = ['symbol', 'ts_event', 'bid_px_00', 'ofi_pca', 'log_return', 'mid_price']
merged_data = merged_data[columns_to_keep]
merged_data = preprocess_data(merged_data)

# Step 5: Visualize log returns
plot_log_return_time_series(data=merged_data, interval=interval, save_path=results_dir)

# Step 6: Analyze contemporaneous cross-impact
cross_impact_coef = contemporaneous_cross_impact(merged_data)

# Step 7: Visualize OFI metrics
plot_ofi_time_series(merged_data, interval=interval, save_path=results_dir)
plot_ofi_distribution(merged_data, interval=interval, save_path=results_dir)
plot_ofi_heatmap(merged_data, interval=interval, save_path=results_dir)
plot_ofi_boxplot(merged_data, interval=interval, save_path=results_dir)

# Step 8: Visualize cross-impact results
plot_heatmap(
    cross_impact_coef,
    title="Contemporaneous Cross-Impact Coefficients ({})".format(interval),
    interval=interval,
    save_path=results_dir
)

compare_self_vs_cross_impact(
    cross_impact_coef,
    title="Self-Impact vs. Average Cross-Impact ({})".format(interval),
    interval=interval,
    save_path=results_dir
)

2025-01-04 23:08:14,709 - INFO - Aggregating order book data by 1s...
2025-01-04 23:08:14,867 - INFO - Order book data aggregation completed successfully.
2025-01-04 23:08:14,871 - INFO - Calculating order flows...
2025-01-04 23:08:14,872 - INFO - Processing symbol: AAPL
2025-01-04 23:08:14,882 - INFO - Processing symbol: AMGN
2025-01-04 23:08:14,891 - INFO - Processing symbol: GILD
2025-01-04 23:08:14,901 - INFO - Processing symbol: JPM
2025-01-04 23:08:14,910 - INFO - Processing symbol: MSFT
2025-01-04 23:08:14,919 - INFO - Processing symbol: NVDA
2025-01-04 23:08:14,928 - INFO - Processing symbol: PEP
2025-01-04 23:08:14,937 - INFO - Processing symbol: TSLA
2025-01-04 23:08:14,947 - INFO - Processing symbol: V
2025-01-04 23:08:14,956 - INFO - Processing symbol: XOM
2025-01-04 23:08:14,967 - INFO - Order flow calculation completed.
2025-01-04 23:08:14,968 - INFO - Integrating OFI using PCA...
2025-01-04 23:08:14,970 - INFO - OFI integration using PCA completed successfully.
2025-01-0