In [1]:
import ast
from datetime import timedelta

import pandas as pd

In [2]:
df = pd.read_csv("/home/tobias/personal-dex-trading/out/data/unichain_uniswap_v2_blocks.csv")
df

Unnamed: 0,token0_amounts,token1_outputs,token1_inputs,timestamp
0,"[0.0001, 0.001, 0.01]","[{'raw': 0, 'amount': 0.0}, {'raw': 0, 'amount...","[{'raw': 100, 'amount': 1e-16}, {'raw': 1000, ...",1753888087
1,"[0.0001, 0.001, 0.01]","[{'raw': 0, 'amount': 0.0}, {'raw': 0, 'amount...","[{'raw': 100, 'amount': 1e-16}, {'raw': 1000, ...",1753888088
2,"[0.0001, 0.001, 0.01]","[{'raw': 0, 'amount': 0.0}, {'raw': 0, 'amount...","[{'raw': 100, 'amount': 1e-16}, {'raw': 1000, ...",1753888089
3,"[0.0001, 0.001, 0.01]","[{'raw': 0, 'amount': 0.0}, {'raw': 0, 'amount...","[{'raw': 100, 'amount': 1e-16}, {'raw': 1000, ...",1753888090
4,"[0.0001, 0.001, 0.01]","[{'raw': 0, 'amount': 0.0}, {'raw': 0, 'amount...","[{'raw': 100, 'amount': 1e-16}, {'raw': 1000, ...",1753888091
5,"[0.0001, 0.001, 0.01]","[{'raw': 0, 'amount': 0.0}, {'raw': 0, 'amount...","[{'raw': 100, 'amount': 1e-16}, {'raw': 1000, ...",1753888092
6,"[0.0001, 0.001, 0.01]","[{'raw': 0, 'amount': 0.0}, {'raw': 0, 'amount...","[{'raw': 100, 'amount': 1e-16}, {'raw': 1000, ...",1753888093
7,"[0.0001, 0.001, 0.01]","[{'raw': 0, 'amount': 0.0}, {'raw': 0, 'amount...","[{'raw': 100, 'amount': 1e-16}, {'raw': 1000, ...",1753888094
8,"[0.0001, 0.001, 0.01]","[{'raw': 0, 'amount': 0.0}, {'raw': 0, 'amount...","[{'raw': 100, 'amount': 1e-16}, {'raw': 1000, ...",1753888095
9,"[0.0001, 0.001, 0.01]","[{'raw': 0, 'amount': 0.0}, {'raw': 0, 'amount...","[{'raw': 100, 'amount': 1e-16}, {'raw': 1000, ...",1753888096


In [42]:
BINANCE_CSV   = "/home/tobias/personal-dex-trading/out/data/order_book.csv"
UNICHAIN_CSV  = "/home/tobias/personal-dex-trading/out/data/block_data.csv"

def _top_price(side_str: str) -> float | None:
    """Return the first price in a stringified list of bids/asks."""
    try:
        side = ast.literal_eval(side_str)
        return float(side[0][0]) if side else None
    except Exception:
        return None

def load_binance(path: str) -> pd.DataFrame:
    df = pd.read_csv(path)           # columns: time, bids, asks
    df["best_bid"] = df["bids"].apply(_top_price)
    df["best_ask"] = df["asks"].apply(_top_price)

    return df[["time", "best_bid", "best_ask"]].sort_values("time")

def load_unichain(path: str) -> pd.DataFrame:
    df = pd.read_csv(path, index_col="timestamp")
    df.index = pd.to_datetime(df.index, unit="s")  # seconds -> datetime
    df.index += pd.Timedelta(hours=2)              # match binance tz

    # Pivot so each input size has its own column of output_value
    df_pivot = (
        df.reset_index()
          .pivot_table(index="timestamp",
                       columns="input_value",
                       values="output_value")
          .rename_axis(None, axis=1)
          .sort_index()
    )

    # Convert WETH→USDC **prices** (USDC per 1 WETH) for each size
    price_cols = {}
    for in_size in df_pivot.columns:
        price_cols[f"uni_px_{in_size}"] = df_pivot[in_size] / in_size

    df_px = pd.concat(price_cols, axis=1)
    return pd.concat([df_pivot, df_px], axis=1).reset_index(names="time")

def merge_dfs(df_bin: pd.DataFrame, df_uni: pd.DataFrame) -> pd.DataFrame:
    df_bin['time'] = pd.to_datetime(df_bin['time'])
    df_uni['time'] = pd.to_datetime(df_uni['time'])
    df_uni_unique = df_uni.drop_duplicates(subset=['time'], keep='first')
    merged_df = pd.merge(
        df_bin,
        df_uni_unique,
        on='time',
        how='left'
    )
    return merged_df

def add_arbitrage_cols(df: pd.DataFrame) -> pd.DataFrame:
    """
    A. Long-Binance / Short-Unichain:
         buy WETH on Binance @ best_ask, immediately sell on Unichain.
    B. Long-Unichain / Short-Binance:
         buy WETH on Unichain (reverse swap) and sell on Binance @ best_bid.
       For (B) we only have the forward quote; a rough proxy is to use
       best_bid / uni_px   (not perfect – use reverse quote if available).
    """
    # We’ll take the smallest quoted size (0.0001 WETH) as representative:
    uni_price = df["uni_px_0.0001"]

    df["edge_binance→uni"] = uni_price - df["best_ask"]   # positive ⇒ profit
    df["edge_uni→binance"] = df["best_bid"] - uni_price   # positive ⇒ profit
    return df

def main():
    df_bin = load_binance(BINANCE_CSV)
    df_uni = load_unichain(UNICHAIN_CSV)

    merged = merge_dfs(df_bin, df_uni)
    merged = add_arbitrage_cols(merged)

    # Save or plot if you wish:
    #merged.to_csv("merged_for_analysis.csv", index=False)
    return merged

In [45]:
merged = main()
merged

Unnamed: 0,time,best_bid,best_ask,0.0001,0.001,0.01,uni_px_0.0001,uni_px_0.001,uni_px_0.01,edge_binance→uni,edge_uni→binance
0,2025-07-29 16:22:46,3808.20,3808.21,,,,,,,,
1,2025-07-29 16:22:47,3808.10,3808.11,0.379535,3.793543,37.755463,3795.35,3793.543,3775.5463,-12.76,12.75
2,2025-07-29 16:22:48,3807.84,3807.85,0.379535,3.793543,37.755463,3795.35,3793.543,3775.5463,-12.50,12.49
3,2025-07-29 16:22:49,3807.78,3807.79,0.379535,3.793543,37.755463,3795.35,3793.543,3775.5463,-12.44,12.43
4,2025-07-29 16:22:50,3807.79,3807.80,0.379535,3.793543,37.755463,3795.35,3793.543,3775.5463,-12.45,12.44
...,...,...,...,...,...,...,...,...,...,...,...
559,2025-07-29 16:32:05,3818.92,3818.93,0.380312,3.801312,37.832594,3803.12,3801.312,3783.2594,-15.81,15.80
560,2025-07-29 16:32:06,3818.92,3818.93,0.380312,3.801312,37.832594,3803.12,3801.312,3783.2594,-15.81,15.80
561,2025-07-29 16:32:07,3818.92,3818.93,0.380312,3.801312,37.832594,3803.12,3801.312,3783.2594,-15.81,15.80
562,2025-07-29 16:32:08,3819.01,3819.02,0.380312,3.801312,37.832594,3803.12,3801.312,3783.2594,-15.90,15.89
