# Spread Orders & Trades Analysis Notebook

This notebook loads and analyzes both spread orders (bid/ask) and spread trades (buy/sell executions) from the SpreadViewer integration script.


In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import warnings
from datetime import datetime
from scipy import stats  # For Q-Q plots and statistical analysis
warnings.filterwarnings('ignore')

# Set display options
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 20)
plt.style.use('seaborn-v0_8')
plt.rcParams['figure.figsize'] = (15, 8)

print("üìä M1M2 Spread Orders & Trades Analysis")
print("=" * 50)

üìä M1M2 Spread Orders & Trades Analysis


## 1. Load Spread Orders and Trades Data

In [2]:
# Path to the generated parquet files
data_path = r'C:\Users\krajcovic\Documents\Testing Data\ATS_data\test\parquet_files'
orders_file = 'DEM1-DEM2_orders_robust.parquet'  # Using robust versions
trades_file = 'DEM1-DEM2_trades_robust.parquet'
metadata_file = 'DEM1-DEM2_metadata_robust.parquet'

orders_path = Path(data_path) / orders_file
trades_path = Path(data_path) / trades_file
metadata_path = Path(data_path) / metadata_file

print(f"üìÅ Data directory: {data_path}")
print(f"üìä Orders file exists: {orders_path.exists()}")
print(f"üíπ Trades file exists: {trades_path.exists()}")
print(f"üìã Metadata file exists: {metadata_path.exists()}")

# Load the data
data_loaded = False

if orders_path.exists() and trades_path.exists():
    try:
        # Load spread orders (bid/ask market data) - use PyArrow for robust files
        orders_df = pd.read_parquet(orders_path, engine='pyarrow')
        print(f"‚úÖ Orders data loaded: {orders_df.shape}")
        
        # Load spread trades (execution data) - use PyArrow for robust files
        trades_df = pd.read_parquet(trades_path, engine='pyarrow')
        print(f"‚úÖ Trades data loaded: {trades_df.shape}")
        
        # Load metadata if available
        if metadata_path.exists():
            metadata_df = pd.read_parquet(metadata_path, engine='pyarrow')
            print(f"‚úÖ Metadata loaded: {metadata_df.shape}")
            display(metadata_df)
        
        data_loaded = True
        
    except Exception as e:
        print(f"‚ùå Failed to load robust parquet files: {e}")
        print("üîÑ Trying CSV backups...")
        
        # Fallback to CSV files
        try:
            orders_csv = orders_path.with_name('DEM1-DEM2_orders_robust.csv')
            trades_csv = trades_path.with_name('DEM1-DEM2_trades_robust.csv')
            
            if orders_csv.exists() and trades_csv.exists():
                orders_df = pd.read_csv(orders_csv, index_col=0, parse_dates=True)
                trades_df = pd.read_csv(trades_csv, index_col=0, parse_dates=True)
                print(f"‚úÖ Loaded from CSV backups: Orders {orders_df.shape}, Trades {trades_df.shape}")
                data_loaded = True
            else:
                print("‚ùå No CSV backups found")
        except Exception as e2:
            print(f"‚ùå CSV backup failed: {e2}")
        
else:
    print("‚ùå Data files not found. Run the integration script first.")

if data_loaded:
    print(f"\nüìä Data Summary:")
    print(f"   Orders period: {orders_df.index.min()} to {orders_df.index.max()}")
    print(f"   Trades period: {trades_df.index.min()} to {trades_df.index.max()}")
    print(f"   Orders columns: {list(orders_df.columns)}")
    print(f"   Trades columns: {list(trades_df.columns)}")

üìÅ Data directory: C:\Users\krajcovic\Documents\Testing Data\ATS_data\test\parquet_files
üìä Orders file exists: True
üíπ Trades file exists: True
üìã Metadata file exists: True
‚ùå Failed to load robust parquet files: Repetition level histogram size mismatch
üîÑ Trying CSV backups...
‚úÖ Loaded from CSV backups: Orders (1000, 2), Trades (1000, 2)

üìä Data Summary:
   Orders period: 2025-07-01 09:00:03.210000 to 2025-07-01 09:19:22.529000
   Trades period: 2025-07-01 09:00:17 to 2025-07-01 14:07:02
   Orders columns: ['bid', 'ask']
   Trades columns: ['buy', 'sell']


## 2. Basic Data Overview

In [3]:
if data_loaded:
    print("üìä ORDERS DATA OVERVIEW")
    print("=" * 30)
    print(f"Shape: {orders_df.shape}")
    print(f"Memory usage: {orders_df.memory_usage(deep=True).sum() / 1024**2:.2f} MB")
    print(f"Date range: {orders_df.index.min()} to {orders_df.index.max()}")
    
    print("\nüìà First 10 orders:")
    display(orders_df.head(10))
    
    print("\nüìà Orders Statistical Summary:")
    display(orders_df.describe())
    
    print("\n" + "=" * 50)
    print("üíπ TRADES DATA OVERVIEW")
    print("=" * 30)
    print(f"Shape: {trades_df.shape}")
    print(f"Memory usage: {trades_df.memory_usage(deep=True).sum() / 1024**2:.2f} MB")
    print(f"Date range: {trades_df.index.min()} to {trades_df.index.max()}")
    
    print("\nüíπ First 10 trades:")
    display(trades_df.head(10))
    
    print("\nüíπ Trades Statistical Summary:")
    display(trades_df.describe())

üìä ORDERS DATA OVERVIEW
Shape: (1000, 2)
Memory usage: 0.02 MB
Date range: 2025-07-01 09:00:03.210000 to 2025-07-01 09:19:22.529000

üìà First 10 orders:


Unnamed: 0_level_0,bid,ask
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1
2025-07-01 09:00:03.210,-8.45,-8.3
2025-07-01 09:00:06.343,-8.47,-8.3
2025-07-01 09:00:07.315,-8.48,-8.3
2025-07-01 09:00:12.878,-8.47,-8.3
2025-07-01 09:00:14.096,-8.48,-8.3
2025-07-01 09:00:17.872,-8.47,-8.3
2025-07-01 09:00:21.185,-8.46,-8.3
2025-07-01 09:00:21.291,-8.46,-8.27
2025-07-01 09:00:21.558,-8.46,-8.26
2025-07-01 09:00:21.749,-8.45,-8.26



üìà Orders Statistical Summary:


Unnamed: 0,bid,ask
count,1000.0,1000.0
mean,-8.22721,-8.13964
std,0.115041,0.100954
min,-8.48,-8.44
25%,-8.32,-8.23
50%,-8.18,-8.1
75%,-8.14,-8.08
max,-8.04,-7.95



üíπ TRADES DATA OVERVIEW
Shape: (1000, 2)
Memory usage: 0.02 MB
Date range: 2025-07-01 09:00:17 to 2025-07-01 14:07:02

üíπ First 10 trades:


Unnamed: 0,buy,sell
2025-07-01 09:00:17,,-8.48
2025-07-01 09:00:36,,-8.45
2025-07-01 09:00:37,,-8.43
2025-07-01 09:00:52,,-8.44
2025-07-01 09:00:56,,-8.43
2025-07-01 09:01:09,,-8.44
2025-07-01 09:01:12,-8.285,-8.45
2025-07-01 09:01:17,-8.23,
2025-07-01 09:01:18,-8.23,
2025-07-01 09:01:19,,-8.4



üíπ Trades Statistical Summary:


Unnamed: 0,buy,sell
count,583.0,594.0
mean,-8.093187,-8.154796
std,0.164711,0.159831
min,-8.53,-8.59
25%,-8.22,-8.259
50%,-8.05,-8.12
75%,-7.99,-8.05
max,-7.74,-7.83


## 3. Calculate Additional Metrics

In [None]:
if data_loaded:
    # Enhance orders data
    orders_df['mid'] = (orders_df['bid'] + orders_df['ask']) / 2
    orders_df['spread_width'] = orders_df['ask'] - orders_df['bid']
    orders_df['mid_return'] = orders_df['mid'].pct_change()
    orders_df['date'] = orders_df.index.date
    orders_df['hour'] = orders_df.index.hour
    orders_df['minute'] = orders_df.index.minute
    
    # Enhance trades data
    trades_df['trade_price'] = trades_df['buy'].fillna(trades_df['sell'])
    trades_df['trade_direction'] = np.where(trades_df['buy'].notna(), 'BUY', 
                                           np.where(trades_df['sell'].notna(), 'SELL', 'UNKNOWN'))
    trades_df['trade_return'] = trades_df['trade_price'].pct_change()
    trades_df['date'] = trades_df.index.date
    trades_df['hour'] = trades_df.index.hour
    trades_df['minute'] = trades_df.index.minute
    
    # Calculate time between trades
    trades_df['time_since_last_trade'] = trades_df.index.to_series().diff().dt.total_seconds()
    
    print("‚úÖ Enhanced metrics calculated:")
    print(f"\nüìä Orders Summary:")
    print(f"   Mid price range: {orders_df['mid'].min():.3f} to {orders_df['mid'].max():.3f}")
    print(f"   Average spread width: {orders_df['spread_width'].mean():.3f}")
    print(f"   Mid price volatility: {orders_df['mid_return'].std():.6f}")
    
    print(f"\nüíπ Trades Summary:")
    print(f"   Trade price range: {trades_df['trade_price'].min():.3f} to {trades_df['trade_price'].max():.3f}")
    print(f"   Buy trades: {(trades_df['trade_direction'] == 'BUY').sum()}")
    print(f"   Sell trades: {(trades_df['trade_direction'] == 'SELL').sum()}")
    print(f"   Avg time between trades: {trades_df['time_since_last_trade'].mean():.1f} seconds")
    print(f"   Trade price volatility: {trades_df['trade_return'].std():.6f}")
    
    print(f"\nüîç Sample enhanced data:")
    display(orders_df[['bid', 'ask', 'mid', 'spread_width']].head())
    display(trades_df[['buy', 'sell', 'trade_price', 'trade_direction']].head())

## 4. Orders vs Trades Time Series Visualization

In [None]:
if data_loaded:
    fig, axes = plt.subplots(4, 1, figsize=(18, 16))
    
    # Plot 1: Orders (Bid/Ask/Mid) with Trade Executions
    axes[0].plot(orders_df.index, orders_df['bid'], label='Bid', alpha=0.6, linewidth=0.8, color='red')
    axes[0].plot(orders_df.index, orders_df['ask'], label='Ask', alpha=0.6, linewidth=0.8, color='blue')
    axes[0].plot(orders_df.index, orders_df['mid'], label='Mid', alpha=0.8, linewidth=1, color='purple')
    
    # Overlay trade executions
    buy_trades = trades_df[trades_df['trade_direction'] == 'BUY']
    sell_trades = trades_df[trades_df['trade_direction'] == 'SELL']
    
    if not buy_trades.empty:
        axes[0].scatter(buy_trades.index, buy_trades['trade_price'], 
                       color='green', marker='^', s=30, alpha=0.8, label=f'Buy Trades ({len(buy_trades)})')
    
    if not sell_trades.empty:
        axes[0].scatter(sell_trades.index, sell_trades['trade_price'], 
                       color='orange', marker='v', s=30, alpha=0.8, label=f'Sell Trades ({len(sell_trades)})')
    
    axes[0].set_title('M1M2 Spread: Orders (Bid/Ask/Mid) with Trade Executions')
    axes[0].set_ylabel('Price')
    axes[0].legend()
    axes[0].grid(True, alpha=0.3)
    
    # Plot 2: Spread Width vs Trade Activity
    axes[1].plot(orders_df.index, orders_df['spread_width'], color='orange', alpha=0.7, linewidth=0.8, label='Spread Width')
    
    # Add trade timestamps as vertical lines
    for trade_time in trades_df.index[::50]:  # Every 50th trade to avoid clutter
        axes[1].axvline(x=trade_time, color='red', alpha=0.3, linewidth=0.5)
    
    axes[1].set_title('Bid-Ask Spread Width with Trade Timing')
    axes[1].set_ylabel('Spread Width')
    axes[1].legend()
    axes[1].grid(True, alpha=0.3)
    
    # Plot 3: Price Returns Comparison
    orders_returns = orders_df['mid_return'].dropna()
    trade_returns = trades_df['trade_return'].dropna()
    
    axes[2].plot(orders_returns.index, orders_returns.cumsum(), 
                 label=f'Orders Cumulative Return', alpha=0.8, linewidth=1.5, color='blue')
    
    if not trade_returns.empty:
        axes[2].plot(trade_returns.index, trade_returns.cumsum(), 
                     label=f'Trades Cumulative Return', alpha=0.8, linewidth=1.5, color='red')
    
    axes[2].set_title('Cumulative Returns: Orders vs Trades')
    axes[2].set_ylabel('Cumulative Return')
    axes[2].legend()
    axes[2].grid(True, alpha=0.3)
    
    # Plot 4: Trade Frequency Over Time
    # Resample trades to show frequency
    trade_frequency = trades_df.resample('10T').size()  # 10-minute bins
    axes[3].bar(trade_frequency.index, trade_frequency.values, 
                width=pd.Timedelta(minutes=8), alpha=0.7, color='green')
    axes[3].set_title('Trade Frequency (10-minute intervals)')
    axes[3].set_ylabel('Number of Trades')
    axes[3].set_xlabel('Time')
    axes[3].grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()

## 5. Trade Execution Analysis

In [None]:
if data_loaded:
    # Trade direction analysis
    trade_direction_counts = trades_df['trade_direction'].value_counts()
    
    fig, axes = plt.subplots(2, 3, figsize=(20, 12))
    
    # Trade direction pie chart
    axes[0,0].pie(trade_direction_counts.values, labels=trade_direction_counts.index, 
                  autopct='%1.1f%%', startangle=90)
    axes[0,0].set_title('Trade Direction Distribution')
    
    # Trade price distribution
    axes[0,1].hist(trades_df['trade_price'].dropna(), bins=50, alpha=0.7, 
                   color='skyblue', edgecolor='black')
    axes[0,1].set_title('Trade Price Distribution')
    axes[0,1].set_xlabel('Trade Price')
    axes[0,1].set_ylabel('Frequency')
    axes[0,1].grid(True, alpha=0.3)
    
    # Time between trades distribution
    time_between = trades_df['time_since_last_trade'].dropna()
    axes[0,2].hist(time_between[time_between < 3600], bins=50, alpha=0.7,  # Filter < 1 hour
                   color='orange', edgecolor='black')
    axes[0,2].set_title('Time Between Trades (< 1 hour)')
    axes[0,2].set_xlabel('Seconds')
    axes[0,2].set_ylabel('Frequency')
    axes[0,2].grid(True, alpha=0.3)
    
    # Hourly trade activity
    hourly_trades = trades_df.groupby('hour').size()
    axes[1,0].bar(hourly_trades.index, hourly_trades.values, alpha=0.7, color='green')
    axes[1,0].set_title('Trade Activity by Hour')
    axes[1,0].set_xlabel('Hour')
    axes[1,0].set_ylabel('Number of Trades')
    axes[1,0].grid(True, alpha=0.3)
    
    # Trade returns distribution
    trade_returns_clean = trades_df['trade_return'].dropna()
    if not trade_returns_clean.empty:
        axes[1,1].hist(trade_returns_clean, bins=50, alpha=0.7, 
                       color='purple', edgecolor='black')
        axes[1,1].set_title('Trade Returns Distribution')
        axes[1,1].set_xlabel('Return')
        axes[1,1].set_ylabel('Frequency')
        axes[1,1].grid(True, alpha=0.3)
    
    # Buy vs Sell price comparison
    if not buy_trades.empty and not sell_trades.empty:
        axes[1,2].boxplot([buy_trades['trade_price'].dropna(), sell_trades['trade_price'].dropna()], 
                          labels=['Buy Trades', 'Sell Trades'])
        axes[1,2].set_title('Buy vs Sell Trade Prices')
        axes[1,2].set_ylabel('Trade Price')
        axes[1,2].grid(True, alpha=0.3)
    else:
        axes[1,2].text(0.5, 0.5, 'Insufficient buy/sell\ntrade data', 
                       transform=axes[1,2].transAxes, ha='center', va='center')
        axes[1,2].set_title('Buy vs Sell Trade Prices')
    
    plt.tight_layout()
    plt.show()
    
    # Print trade statistics
    print("üìä Trade Execution Statistics:")
    print(f"   Total trades: {len(trades_df):,}")
    print(f"   Buy trades: {len(buy_trades):,} ({len(buy_trades)/len(trades_df)*100:.1f}%)")
    print(f"   Sell trades: {len(sell_trades):,} ({len(sell_trades)/len(trades_df)*100:.1f}%)")
    print(f"   Average trade price: {trades_df['trade_price'].mean():.4f}")
    print(f"   Trade price std: {trades_df['trade_price'].std():.4f}")
    print(f"   Average time between trades: {time_between.mean():.1f} seconds")
    print(f"   Median time between trades: {time_between.median():.1f} seconds")

## 6. Orders vs Trades Price Impact Analysis

In [None]:
if data_loaded:
    # Align orders and trades data for comparison
    print("üîç Analyzing price impact of trades on order book...")
    
    # For each trade, find the closest order book snapshot
    trade_impacts = []
    
    for trade_time, trade_row in trades_df.iterrows():
        # Find closest order snapshot (within 5 seconds)
        time_window = pd.Timedelta(seconds=5)
        
        # Orders before trade
        before_orders = orders_df[
            (orders_df.index >= trade_time - time_window) & 
            (orders_df.index < trade_time)
        ]
        
        # Orders after trade  
        after_orders = orders_df[
            (orders_df.index > trade_time) & 
            (orders_df.index <= trade_time + time_window)
        ]
        
        if not before_orders.empty and not after_orders.empty:
            before_mid = before_orders['mid'].iloc[-1]  # Last before trade
            after_mid = after_orders['mid'].iloc[0]     # First after trade
            
            impact = after_mid - before_mid
            
            trade_impacts.append({
                'trade_time': trade_time,
                'trade_price': trade_row['trade_price'],
                'trade_direction': trade_row['trade_direction'],
                'before_mid': before_mid,
                'after_mid': after_mid,
                'price_impact': impact
            })
    
    if trade_impacts:
        impact_df = pd.DataFrame(trade_impacts)
        
        fig, axes = plt.subplots(2, 2, figsize=(16, 12))
        
        # Price impact distribution
        axes[0,0].hist(impact_df['price_impact'], bins=50, alpha=0.7, 
                       color='red', edgecolor='black')
        axes[0,0].axvline(x=0, color='black', linestyle='--', alpha=0.8)
        axes[0,0].set_title('Trade Price Impact Distribution')
        axes[0,0].set_xlabel('Price Impact')
        axes[0,0].set_ylabel('Frequency')
        axes[0,0].grid(True, alpha=0.3)
        
        # Price impact by trade direction
        buy_impacts = impact_df[impact_df['trade_direction'] == 'BUY']['price_impact']
        sell_impacts = impact_df[impact_df['trade_direction'] == 'SELL']['price_impact']
        
        if not buy_impacts.empty and not sell_impacts.empty:
            axes[0,1].boxplot([buy_impacts, sell_impacts], 
                              labels=['Buy Impact', 'Sell Impact'])
            axes[0,1].axhline(y=0, color='black', linestyle='--', alpha=0.8)
            axes[0,1].set_title('Price Impact by Trade Direction')
            axes[0,1].set_ylabel('Price Impact')
            axes[0,1].grid(True, alpha=0.3)
        
        # Trade price vs order book mid
        axes[1,0].scatter(impact_df['before_mid'], impact_df['trade_price'], 
                          alpha=0.6, s=20, color='blue')
        axes[1,0].plot([impact_df['before_mid'].min(), impact_df['before_mid'].max()],
                       [impact_df['before_mid'].min(), impact_df['before_mid'].max()], 
                       'r--', alpha=0.8, label='Perfect correlation')
        axes[1,0].set_xlabel('Order Book Mid (before trade)')
        axes[1,0].set_ylabel('Trade Price')
        axes[1,0].set_title('Trade Price vs Order Book Mid')
        axes[1,0].legend()
        axes[1,0].grid(True, alpha=0.3)
        
        # Price impact over time
        axes[1,1].scatter(impact_df['trade_time'], impact_df['price_impact'], 
                          alpha=0.6, s=20, color='green')
        axes[1,1].axhline(y=0, color='black', linestyle='--', alpha=0.8)
        axes[1,1].set_xlabel('Time')
        axes[1,1].set_ylabel('Price Impact')
        axes[1,1].set_title('Price Impact Over Time')
        axes[1,1].grid(True, alpha=0.3)
        
        plt.tight_layout()
        plt.show()
        
        print(f"\nüìä Price Impact Analysis ({len(impact_df)} trade impacts analyzed):")
        print(f"   Average price impact: {impact_df['price_impact'].mean():.6f}")
        print(f"   Price impact std: {impact_df['price_impact'].std():.6f}")
        print(f"   Positive impacts: {(impact_df['price_impact'] > 0).sum()} ({(impact_df['price_impact'] > 0).sum()/len(impact_df)*100:.1f}%)")
        print(f"   Negative impacts: {(impact_df['price_impact'] < 0).sum()} ({(impact_df['price_impact'] < 0).sum()/len(impact_df)*100:.1f}%)")
        
        if not buy_impacts.empty:
            print(f"   Buy trade impact: {buy_impacts.mean():.6f} ¬± {buy_impacts.std():.6f}")
        if not sell_impacts.empty:
            print(f"   Sell trade impact: {sell_impacts.mean():.6f} ¬± {sell_impacts.std():.6f}")
    else:
        print("‚ö†Ô∏è  No trade impacts could be calculated (insufficient order/trade alignment)")

## 7. Daily Analysis: Orders vs Trades

In [None]:
if data_loaded:
    # Daily statistics for orders
    daily_orders = orders_df.groupby('date').agg({
        'mid': ['min', 'max', 'mean', 'std', 'first', 'last', 'count'],
        'spread_width': ['mean', 'std']
    }).round(4)
    
    daily_orders.columns = ['Mid_Min', 'Mid_Max', 'Mid_Mean', 'Mid_Std', 'Mid_Open', 
                           'Mid_Close', 'Order_Count', 'SpreadWidth_Mean', 'SpreadWidth_Std']
    daily_orders['Daily_Return'] = ((daily_orders['Mid_Close'] / daily_orders['Mid_Open']) - 1).round(6)
    
    # Daily statistics for trades
    daily_trades = trades_df.groupby('date').agg({
        'trade_price': ['min', 'max', 'mean', 'std', 'first', 'last', 'count'],
        'time_since_last_trade': ['mean', 'median']
    }).round(4)
    
    daily_trades.columns = ['Trade_Min', 'Trade_Max', 'Trade_Mean', 'Trade_Std', 'Trade_First', 
                           'Trade_Last', 'Trade_Count', 'AvgTimeBetween', 'MedianTimeBetween']
    daily_trades['Trade_Daily_Return'] = ((daily_trades['Trade_Last'] / daily_trades['Trade_First']) - 1).round(6)
    
    # Combine daily stats
    daily_combined = daily_orders.join(daily_trades, how='outer')
    daily_combined['Trade_Count'] = daily_combined['Trade_Count'].fillna(0).astype(int)
    
    print("üìÖ Daily Orders Statistics:")
    display(daily_orders)
    
    print("\nüíπ Daily Trades Statistics:")
    display(daily_trades)
    
    # Visualization
    fig, axes = plt.subplots(2, 2, figsize=(18, 12))
    
    # Daily orders vs trades count
    x_pos = range(len(daily_combined))
    axes[0,0].bar([x - 0.2 for x in x_pos], daily_combined['Order_Count'], 
                  width=0.4, label='Orders', alpha=0.7, color='blue')
    axes[0,0].bar([x + 0.2 for x in x_pos], daily_combined['Trade_Count'], 
                  width=0.4, label='Trades', alpha=0.7, color='red')
    axes[0,0].set_xticks(x_pos)
    axes[0,0].set_xticklabels([d.strftime('%m-%d') for d in daily_combined.index], rotation=45)
    axes[0,0].set_title('Daily Orders vs Trades Count')
    axes[0,0].set_ylabel('Count')
    axes[0,0].legend()
    axes[0,0].grid(True, alpha=0.3)
    
    # Daily returns comparison
    axes[0,1].plot(daily_combined.index, daily_combined['Daily_Return'] * 100, 
                   'o-', label='Orders Return', marker='o', markersize=8)
    axes[0,1].plot(daily_combined.index, daily_combined['Trade_Daily_Return'] * 100, 
                   's-', label='Trades Return', marker='s', markersize=8)
    axes[0,1].axhline(y=0, color='black', linestyle='--', alpha=0.5)
    axes[0,1].set_title('Daily Returns: Orders vs Trades')
    axes[0,1].set_ylabel('Return (%)')
    axes[0,1].legend()
    axes[0,1].grid(True, alpha=0.3)
    
    # Daily volatility comparison
    axes[1,0].bar([x - 0.2 for x in x_pos], daily_combined['Mid_Std'], 
                  width=0.4, label='Orders Volatility', alpha=0.7, color='purple')
    axes[1,0].bar([x + 0.2 for x in x_pos], daily_combined['Trade_Std'], 
                  width=0.4, label='Trades Volatility', alpha=0.7, color='orange')
    axes[1,0].set_xticks(x_pos)
    axes[1,0].set_xticklabels([d.strftime('%m-%d') for d in daily_combined.index], rotation=45)
    axes[1,0].set_title('Daily Volatility: Orders vs Trades')
    axes[1,0].set_ylabel('Standard Deviation')
    axes[1,0].legend()
    axes[1,0].grid(True, alpha=0.3)
    
    # Trade frequency analysis
    trade_efficiency = daily_combined['Trade_Count'] / daily_combined['Order_Count'] * 100
    axes[1,1].bar(x_pos, trade_efficiency, alpha=0.7, color='green')
    axes[1,1].set_xticks(x_pos)
    axes[1,1].set_xticklabels([d.strftime('%m-%d') for d in daily_combined.index], rotation=45)
    axes[1,1].set_title('Trade Efficiency (Trades/Orders %)')
    axes[1,1].set_ylabel('Percentage')
    axes[1,1].grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()
    
    print(f"\nüìä Daily Summary:")
    print(f"   Total days analyzed: {len(daily_combined)}")
    print(f"   Average orders per day: {daily_combined['Order_Count'].mean():.0f}")
    print(f"   Average trades per day: {daily_combined['Trade_Count'].mean():.0f}")
    print(f"   Average trade efficiency: {trade_efficiency.mean():.2f}%")
    print(f"   Best trading day: {daily_combined['Trade_Count'].idxmax()} ({daily_combined['Trade_Count'].max()} trades)")
    print(f"   Most volatile day (orders): {daily_combined['Mid_Std'].idxmax()} (œÉ={daily_combined['Mid_Std'].max():.4f})")

## 8. Export Enhanced Data

In [None]:
if data_loaded:
    # Save enhanced datasets
    output_path = Path(data_path)
    
    # Enhanced orders data
    orders_enhanced = orders_df[['bid', 'ask', 'mid', 'spread_width', 'mid_return']].copy()
    orders_enhanced_path = output_path / 'DEM1-DEM2_orders_enhanced.parquet'
    orders_enhanced.to_parquet(orders_enhanced_path)
    print(f"üíæ Enhanced orders saved: {orders_enhanced_path}")
    
    # Enhanced trades data
    trades_enhanced = trades_df[['buy', 'sell', 'trade_price', 'trade_direction', 
                                'trade_return', 'time_since_last_trade']].copy()
    trades_enhanced_path = output_path / 'DEM1-DEM2_trades_enhanced.parquet'
    trades_enhanced.to_parquet(trades_enhanced_path)
    print(f"üíæ Enhanced trades saved: {trades_enhanced_path}")
    
    # Daily analysis results
    daily_analysis_path = output_path / 'DEM1-DEM2_daily_analysis.csv'
    daily_combined.to_csv(daily_analysis_path)
    print(f"üìÖ Daily analysis saved: {daily_analysis_path}")
    
    # Trade impact analysis (if available)
    if 'impact_df' in locals() and not impact_df.empty:
        impact_analysis_path = output_path / 'DEM1-DEM2_trade_impacts.csv'
        impact_df.to_csv(impact_analysis_path)
        print(f"üéØ Trade impact analysis saved: {impact_analysis_path}")
    
    print(f"\n‚úÖ All enhanced data exported to: {output_path}")

## 9. Comprehensive Summary Report

In [None]:
if data_loaded:
    print("üìã M1M2 SPREAD ORDERS & TRADES ANALYSIS SUMMARY")
    print("=" * 70)
    
    # Dataset overview
    print(f"üìä DATASET OVERVIEW:")
    print(f"   üìÖ Analysis period: {orders_df.index.min().strftime('%Y-%m-%d')} to {orders_df.index.max().strftime('%Y-%m-%d')}")
    print(f"   üìä Total orders: {len(orders_df):,} points")
    print(f"   üíπ Total trades: {len(trades_df):,} executions")
    print(f"   üéØ Trade-to-order ratio: {len(trades_df)/len(orders_df)*100:.2f}%")
    
    # Price analysis
    print(f"\nüí∞ PRICE ANALYSIS:")
    print(f"   üìà Order mid range: {orders_df['mid'].min():.4f} to {orders_df['mid'].max():.4f}")
    print(f"   üíπ Trade price range: {trades_df['trade_price'].min():.4f} to {trades_df['trade_price'].max():.4f}")
    print(f"   üìè Average spread width: {orders_df['spread_width'].mean():.4f}")
    print(f"   üìä Order volatility: {orders_df['mid_return'].std():.6f}")
    print(f"   üíπ Trade volatility: {trades_df['trade_return'].std():.6f}")
    
    # Trading activity
    print(f"\n‚ö° TRADING ACTIVITY:")
    print(f"   üü¢ Buy trades: {len(buy_trades):,} ({len(buy_trades)/len(trades_df)*100:.1f}%)")
    print(f"   üî¥ Sell trades: {len(sell_trades):,} ({len(sell_trades)/len(trades_df)*100:.1f}%)")
    print(f"   ‚è±Ô∏è  Average time between trades: {trades_df['time_since_last_trade'].mean():.1f} seconds")
    print(f"   üïê Most active hour: {hourly_trades.idxmax()}:00 ({hourly_trades.max()} trades)")
    
    # Daily performance
    print(f"\nüìÖ DAILY PERFORMANCE:")
    print(f"   üìä Average orders/day: {daily_combined['Order_Count'].mean():.0f}")
    print(f"   üíπ Average trades/day: {daily_combined['Trade_Count'].mean():.0f}")
    print(f"   üöÄ Best trading day: {daily_combined['Trade_Count'].idxmax()} ({daily_combined['Trade_Count'].max()} trades)")
    print(f"   üìà Best return day: {daily_combined['Daily_Return'].idxmax()} ({daily_combined['Daily_Return'].max()*100:.3f}%)")
    print(f"   üìâ Worst return day: {daily_combined['Daily_Return'].idxmin()} ({daily_combined['Daily_Return'].min()*100:.3f}%)")
    
    # Trade impact (if analyzed)
    if 'impact_df' in locals() and not impact_df.empty:
        print(f"\nüéØ TRADE IMPACT ANALYSIS:")
        print(f"   üìä Impacts analyzed: {len(impact_df)}")
        print(f"   üìà Average impact: {impact_df['price_impact'].mean():.6f}")
        print(f"   ‚¨ÜÔ∏è  Positive impacts: {(impact_df['price_impact'] > 0).sum()} ({(impact_df['price_impact'] > 0).sum()/len(impact_df)*100:.1f}%)")
        print(f"   ‚¨áÔ∏è  Negative impacts: {(impact_df['price_impact'] < 0).sum()} ({(impact_df['price_impact'] < 0).sum()/len(impact_df)*100:.1f}%)")
    
    print(f"\n‚úÖ ANALYSIS COMPLETED SUCCESSFULLY!")
    print(f"üóÇÔ∏è  All data and visualizations generated for M1M2 spread analysis")
    print("=" * 70)
else:
    print("‚ùå No data loaded. Please run the cells above to load the spread data first.")