In [None]:
"""import pandas as pd
import time
import os
import numpy as np

# CONFIGURATION
CLEAN_CSV_FILE = "fidelity_orderbook_clean.csv"
POLL_INTERVAL = 1.0
MAX_POINTS = 10000
OFI_LOOKBACK = 8

print(f"Starting depth imbalance + normalized OFI monitor (polling every {POLL_INTERVAL}s)")
print(f"Source: {CLEAN_CSV_FILE}")
print(f"OFI lookback: {OFI_LOOKBACK} rows, MAX_POINTS: {MAX_POINTS}")

imbalance_df = pd.DataFrame(columns=[
    'timestamp', 'depth_imbalance', 'ask_bid_walk',
    'OFI_bid', 'OFI_ask', 'flow_bid', 'flow_ask',
    'bid_price', 'ask_price', 'bid_size', 'ask_size',
    'total_bid_notional', 'total_ask_notional', 'top3_bid_notional', 'top3_ask_notional'
])
last_file_size = 0

def compute_notional_totals(row):
    """Compute total bid/ask notional for a single row (10 levels)"""
    total_bid_notional = 0
    total_ask_notional = 0
    
    for lvl in range(1, 11):
        ask_p = pd.to_numeric(row.get(f'ask_{lvl}'), errors='coerce')
        ask_s = pd.to_numeric(row.get(f'size_a_{lvl}'), errors='coerce')
        bid_p = pd.to_numeric(row.get(f'bid_{lvl}'), errors='coerce')
        bid_s = pd.to_numeric(row.get(f'size_b_{lvl}'), errors='coerce')
        
        if not (pd.isna(ask_p) or pd.isna(ask_s)):
            total_ask_notional += ask_p * ask_s
        if not (pd.isna(bid_p) or pd.isna(bid_s)):
            total_bid_notional += bid_p * bid_s
    
    return total_bid_notional, total_ask_notional

def compute_top3_notional(row):
    """Compute top-3 bid/ask notional for normalization"""
    top3_bid_notional = 0
    top3_ask_notional = 0
    
    for lvl in range(1, 4):
        ask_p = pd.to_numeric(row.get(f'ask_{lvl}'), errors='coerce')
        ask_s = pd.to_numeric(row.get(f'size_a_{lvl}'), errors='coerce')
        bid_p = pd.to_numeric(row.get(f'bid_{lvl}'), errors='coerce')
        bid_s = pd.to_numeric(row.get(f'size_b_{lvl}'), errors='coerce')
        
        if not (pd.isna(ask_p) or pd.isna(ask_s)):
            top3_ask_notional += ask_p * ask_s
        if not (pd.isna(bid_p) or pd.isna(bid_s)):
            top3_bid_notional += bid_p * bid_s
    
    return top3_bid_notional, top3_ask_notional

print("Waiting for clean CSV data...")

try:
    while True:
        start_time = time.time()

        if os.path.exists(CLEAN_CSV_FILE):
            current_file_size = os.path.getsize(CLEAN_CSV_FILE)

            if current_file_size > last_file_size:
                try:
                    df_clean = pd.read_csv(CLEAN_CSV_FILE)
                    print(f"Loaded {len(df_clean)} rows from clean CSV")

                    # *** FIXED: Process ALL rows, not just tail(10) ***
                    new_rows = df_clean  # ← ALL ROWS!
                    new_raw_rows = []

                    for _, row in new_rows.iterrows():
                        bid_price = pd.to_numeric(row.get('bid_1'), errors='coerce')
                        ask_price = pd.to_numeric(row.get('ask_1'), errors='coerce')
                        bid_size = pd.to_numeric(row.get('size_b_1'), errors='coerce')
                        ask_size = pd.to_numeric(row.get('size_a_1'), errors='coerce')
                        
                        if pd.isna(bid_price) or pd.isna(ask_price):
                            continue
                            
                        new_raw_rows.append({
                            'timestamp': row['timestamp'],
                            'bid_price': bid_price, 'ask_price': ask_price,
                            'bid_size': bid_size, 'ask_size': ask_size,
                            **{col: row[col] for col in row.index if col.startswith(('bid_', 'ask_', 'size_'))}
                        })

                    # Add new raw data to imbalance_df
                    if new_raw_rows:
                        new_df = pd.DataFrame(new_raw_rows)
                        existing_timestamps = set(imbalance_df['timestamp'])
                        fresh_rows = new_df[~new_df['timestamp'].isin(existing_timestamps)]

                        print(f"Found {len(fresh_rows)} NEW rows to add (total will be {len(imbalance_df) + len(fresh_rows)})")

                        if not fresh_rows.empty:
                            if imbalance_df.empty:
                                imbalance_df = fresh_rows.copy()
                            else:
                                imbalance_df = pd.concat([imbalance_df, fresh_rows], ignore_index=True)

                            # Trim to MAX_POINTS
                            if len(imbalance_df) > MAX_POINTS:
                                imbalance_df = imbalance_df.tail(MAX_POINTS).reset_index(drop=True)

                            # RECOMPUTE ALL METRICS ON FULL MAX_POINTS DATA
                            print(f"\n--- RECOMPUTING METRICS ON {len(imbalance_df)} rows ---")
                            for i in range(len(imbalance_df)):
                                row = imbalance_df.iloc[i]
                                
                                total_bid_n, total_ask_n = compute_notional_totals(row)
                                top3_bid_n, top3_ask_n = compute_top3_notional(row)
                                
                                imbalance_df.loc[i, 'total_bid_notional'] = total_bid_n
                                imbalance_df.loc[i, 'total_ask_notional'] = total_ask_n
                                imbalance_df.loc[i, 'top3_bid_notional'] = top3_bid_n
                                imbalance_df.loc[i, 'top3_ask_notional'] = top3_ask_n
                                
                                denom = total_bid_n + total_ask_n
                                depth_imbalance = (total_bid_n - total_ask_n) / denom if denom != 0 else np.nan
                                imbalance_df.loc[i, 'depth_imbalance'] = depth_imbalance
                                
                                ask_bid_walk = top3_ask_n / top3_bid_n if top3_bid_n != 0 else np.nan
                                imbalance_df.loc[i, 'ask_bid_walk'] = ask_bid_walk
                                
                                ofi_bid, ofi_ask = 0, 0
                                if i >= OFI_LOOKBACK:
                                    prior_idx = i - OFI_LOOKBACK
                                    prior_bid_n = imbalance_df.loc[prior_idx, 'total_bid_notional']
                                    prior_ask_n = imbalance_df.loc[prior_idx, 'total_ask_notional']
                                    ofi_bid = total_bid_n - prior_bid_n
                                    ofi_ask = total_ask_n - prior_ask_n
                                imbalance_df.loc[i, 'OFI_bid'] = ofi_bid
                                imbalance_df.loc[i, 'OFI_ask'] = ofi_ask
                                
                                flow_bid = ofi_bid / top3_bid_n if top3_bid_n != 0 else np.nan
                                flow_ask = ofi_ask / top3_ask_n if top3_ask_n != 0 else np.nan
                                imbalance_df.loc[i, 'flow_bid'] = flow_bid
                                imbalance_df.loc[i, 'flow_ask'] = flow_ask

                            latest = imbalance_df.iloc[-1]
                            print(f"[{latest['timestamp']}] Imb:{latest['depth_imbalance']:.3f} W:{latest['ask_bid_walk']:.2f} "
                                  f"OFI_B:${latest['OFI_bid']:,.0f} OFI_A:${latest['OFI_ask']:,.0f} "
                                  f"F_B:{latest['flow_bid']:.3f} F_A:{latest['flow_ask']:.3f} "
                                  f"B1:${latest['bid_price']:.2f}x{int(latest['bid_size'])} "
                                  f"A1:${latest['ask_price']:.2f}x{int(latest['ask_size'])} "
                                  f"({len(imbalance_df)} rows)")

                    last_file_size = current_file_size

                except Exception as e:
                    print(f"Processing error: {e}")

        else:
            print("Clean CSV not found yet, waiting...")

        elapsed = time.time() - start_time
        sleep_time = max(0, POLL_INTERVAL - elapsed)
        if sleep_time > 0:
            time.sleep(sleep_time)

except KeyboardInterrupt:
    print("\nStopped by user")
    print(f"Final DataFrame shape: {imbalance_df.shape}")
    if not imbalance_df.empty:
        print("\nLatest 5 rows:")
        print(imbalance_df[['timestamp', 'depth_imbalance', 'ask_bid_walk', 'OFI_bid', 'OFI_ask', 
                           'flow_bid', 'flow_ask', 'bid_price', 'ask_price']].tail())"""


Starting depth imbalance + normalized OFI monitor (polling every 1.0s)
Source: fidelity_orderbook_clean.csv
OFI lookback: 8 rows, MAX_POINTS: 10000
Waiting for clean CSV data...
Loaded 533 rows from clean CSV
Found 532 NEW rows to add (total will be 532)

--- RECOMPUTING METRICS ON 532 rows ---
[2025-12-06 12:18:41] Imb:-0.655 W:50.11 OFI_B:$0 OFI_A:$0 F_B:0.000 F_A:0.000 B1:$454.61x1 A1:$454.64x160 (532 rows)

Stopped by user
Final DataFrame shape: (532, 59)

Latest 5 rows:
               timestamp  depth_imbalance  ask_bid_walk  OFI_bid  OFI_ask  \
527  2025-12-06 11:57:03        -0.654939     50.110598      0.0      0.0   
528  2025-12-06 12:18:17        -0.654939     50.110598      0.0      0.0   
529  2025-12-06 12:18:24        -0.654939     50.110598      0.0      0.0   
530  2025-12-06 12:18:32        -0.654939     50.110598      0.0      0.0   
531  2025-12-06 12:18:41        -0.654939     50.110598      0.0      0.0   

     flow_bid  flow_ask  bid_price  ask_price  
527      

In [None]:
import pandas as pd
import time
import os
import numpy as np

# CONFIGURATION
CLEAN_CSV_FILE = "fidelity_orderbook_clean.csv"
POLL_INTERVAL = 1.0
MAX_POINTS = 10000
OFI_LOOKBACK = 8
PICKLE_FILE = "imbalance_live.pkl"  # <---- Added global pickle target

print(f"Starting depth imbalance + normalized OFI monitor (polling every {POLL_INTERVAL}s)")
print(f"Source: {CLEAN_CSV_FILE}")
print(f"OFI lookback: {OFI_LOOKBACK} rows, MAX_POINTS: {MAX_POINTS}")

imbalance_df = pd.DataFrame(columns=[
    'timestamp', 'depth_imbalance', 'ask_bid_walk',
    'OFI_bid', 'OFI_ask', 'flow_bid', 'flow_ask',
    'bid_price', 'ask_price', 'bid_size', 'ask_size',
    'total_bid_notional', 'total_ask_notional', 'top3_bid_notional', 'top3_ask_notional'
])
last_file_size = 0


def compute_notional_totals(row):
    total_bid_notional, total_ask_notional = 0, 0
    for lvl in range(1, 11):
        ask_p = pd.to_numeric(row.get(f'ask_{lvl}'), errors='coerce')
        ask_s = pd.to_numeric(row.get(f'size_a_{lvl}'), errors='coerce')
        bid_p = pd.to_numeric(row.get(f'bid_{lvl}'), errors='coerce')
        bid_s = pd.to_numeric(row.get(f'size_b_{lvl}'), errors='coerce')
        if not (pd.isna(ask_p) or pd.isna(ask_s)):
            total_ask_notional += ask_p * ask_s
        if not (pd.isna(bid_p) or pd.isna(bid_s)):
            total_bid_notional += bid_p * bid_s
    return total_bid_notional, total_ask_notional


def compute_top3_notional(row):
    top3_bid_notional, top3_ask_notional = 0, 0
    for lvl in range(1, 4):
        ask_p = pd.to_numeric(row.get(f'ask_{lvl}'), errors='coerce')
        ask_s = pd.to_numeric(row.get(f'size_a_{lvl}'), errors='coerce')
        bid_p = pd.to_numeric(row.get(f'bid_{lvl}'), errors='coerce')
        bid_s = pd.to_numeric(row.get(f'size_b_{lvl}'), errors='coerce')
        if not (pd.isna(ask_p) or pd.isna(ask_s)):
            top3_ask_notional += ask_p * ask_s
        if not (pd.isna(bid_p) or pd.isna(bid_s)):
            top3_bid_notional += bid_p * bid_s
    return top3_bid_notional, top3_ask_notional


print("Waiting for clean CSV data...")

try:
    while True:
        start_time = time.time()

        if os.path.exists(CLEAN_CSV_FILE):
            current_file_size = os.path.getsize(CLEAN_CSV_FILE)
            if current_file_size > last_file_size:
                try:
                    df_clean = pd.read_csv(CLEAN_CSV_FILE)
                    print(f"Loaded {len(df_clean)} rows from clean CSV")

                    new_rows = df_clean
                    new_raw_rows = []
                    for _, row in new_rows.iterrows():
                        bid_price = pd.to_numeric(row.get('bid_1'), errors='coerce')
                        ask_price = pd.to_numeric(row.get('ask_1'), errors='coerce')
                        bid_size = pd.to_numeric(row.get('size_b_1'), errors='coerce')
                        ask_size = pd.to_numeric(row.get('size_a_1'), errors='coerce')
                        if pd.isna(bid_price) or pd.isna(ask_price):
                            continue
                        new_raw_rows.append({
                            'timestamp': row['timestamp'],
                            'bid_price': bid_price, 'ask_price': ask_price,
                            'bid_size': bid_size, 'ask_size': ask_size,
                            **{col: row[col] for col in row.index if col.startswith(('bid_', 'ask_', 'size_'))}
                        })

                    if new_raw_rows:
                        new_df = pd.DataFrame(new_raw_rows)
                        existing_timestamps = set(imbalance_df['timestamp'])
                        fresh_rows = new_df[~new_df['timestamp'].isin(existing_timestamps)]

                        if not fresh_rows.empty:
                            if imbalance_df.empty:
                                imbalance_df = fresh_rows.copy()
                            else:
                                imbalance_df = pd.concat([imbalance_df, fresh_rows], ignore_index=True)

                            if len(imbalance_df) > MAX_POINTS:
                                imbalance_df = imbalance_df.tail(MAX_POINTS).reset_index(drop=True)

                            print(f"\n--- RECOMPUTING METRICS ON {len(imbalance_df)} rows ---")
                            for i in range(len(imbalance_df)):
                                row = imbalance_df.iloc[i]
                                total_bid_n, total_ask_n = compute_notional_totals(row)
                                top3_bid_n, top3_ask_n = compute_top3_notional(row)

                                imbalance_df.loc[i, 'total_bid_notional'] = total_bid_n
                                imbalance_df.loc[i, 'total_ask_notional'] = total_ask_n
                                imbalance_df.loc[i, 'top3_bid_notional'] = top3_bid_n
                                imbalance_df.loc[i, 'top3_ask_notional'] = top3_ask_n

                                denom = total_bid_n + total_ask_n
                                imb = (total_bid_n - total_ask_n) / denom if denom != 0 else np.nan
                                imbalance_df.loc[i, 'depth_imbalance'] = imb

                                ask_bid_walk = top3_ask_n / top3_bid_n if top3_bid_n != 0 else np.nan
                                imbalance_df.loc[i, 'ask_bid_walk'] = ask_bid_walk

                                ofi_bid, ofi_ask = 0, 0
                                if i >= OFI_LOOKBACK:
                                    p_idx = i - OFI_LOOKBACK
                                    prior_bid_n = imbalance_df.loc[p_idx, 'total_bid_notional']
                                    prior_ask_n = imbalance_df.loc[p_idx, 'total_ask_notional']
                                    ofi_bid = total_bid_n - prior_bid_n
                                    ofi_ask = total_ask_n - prior_ask_n
                                imbalance_df.loc[i, 'OFI_bid'] = ofi_bid
                                imbalance_df.loc[i, 'OFI_ask'] = ofi_ask

                                flow_bid = ofi_bid / top3_bid_n if top3_bid_n != 0 else np.nan
                                flow_ask = ofi_ask / top3_ask_n if top3_ask_n != 0 else np.nan
                                imbalance_df.loc[i, 'flow_bid'] = flow_bid
                                imbalance_df.loc[i, 'flow_ask'] = flow_ask

                            # Save to pickle for other kernels
                            imbalance_df.to_pickle(PICKLE_FILE)
                            print(f"Updated pickle: {PICKLE_FILE}")

                            latest = imbalance_df.iloc[-1]
                            print(f"[{latest['timestamp']}] Imb:{latest['depth_imbalance']:.3f} "
                                  f"W:{latest['ask_bid_walk']:.2f} F_B:{latest['flow_bid']:.3f} F_A:{latest['flow_ask']:.3f}")

                    last_file_size = current_file_size

                except Exception as e:
                    print(f"Processing error: {e}")

        else:
            print("Clean CSV not found yet, waiting...")

        elapsed = time.time() - start_time
        time.sleep(max(0, POLL_INTERVAL - elapsed))

except KeyboardInterrupt:
    print("\nStopped by user")

    if not imbalance_df.empty:
        # Save final snapshot
        imbalance_df.to_pickle(PICKLE_FILE)
        print(f"Saved final snapshot to {PICKLE_FILE}")
        
        # Print final dataframe info
        print(f"\nFinal DataFrame shape: {imbalance_df.shape}")
        print("\nLatest 10 rows of final data:")
        print(imbalance_df.tail(10).to_string(index=False))

        # Optional: print data summary
        print("\nColumn summary (non-null counts and dtypes):")
        print(imbalance_df.info())
    else:
        print("No data captured — imbalance_df is empty.")


Starting depth imbalance + normalized OFI monitor (polling every 1.0s)
Source: fidelity_orderbook_clean.csv
OFI lookback: 8 rows, MAX_POINTS: 10000
Waiting for clean CSV data...
Loaded 1158 rows from clean CSV

--- RECOMPUTING METRICS ON 1157 rows ---
Updated pickle: imbalance_live.pkl
[2025-12-08 14:11:55] Imb:0.457 W:1.16 F_B:7.263 F_A:-0.595
