In [10]:
import ast
from datetime import timedelta

import pandas as pd

In [22]:
df_unichain = pd.read_csv("/home/tobias/personal-dex-trading/out/data/20250731_124711_unichain_uniswap_v2_blocks.csv", index_col="timestamp")
df_unichain.index = pd.to_datetime(df_unichain.index, unit="s")  # seconds -> datetime
df_unichain.index += pd.Timedelta(hours=2)
df_unichain

Unnamed: 0_level_0,token0_amounts,token1_outputs,token1_inputs
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2025-07-31 11:47:10,"[10000000000000, 1000000000000]","[38531, 3853]","[38765, 3877]"
2025-07-31 11:47:11,"[10000000000000, 1000000000000]","[38531, 3853]","[38765, 3877]"
2025-07-31 11:47:12,"[10000000000000, 1000000000000]","[38531, 3853]","[38765, 3877]"
2025-07-31 11:47:13,"[10000000000000, 1000000000000]","[38531, 3853]","[38765, 3877]"
2025-07-31 11:47:14,"[10000000000000, 1000000000000]","[38531, 3853]","[38765, 3877]"
...,...,...,...
2025-07-31 12:47:06,"[10000000000000, 1000000000000]","[38525, 3852]","[38759, 3876]"
2025-07-31 12:47:07,"[10000000000000, 1000000000000]","[38525, 3852]","[38759, 3876]"
2025-07-31 12:47:08,"[10000000000000, 1000000000000]","[38525, 3852]","[38759, 3876]"
2025-07-31 12:47:09,"[10000000000000, 1000000000000]","[38525, 3852]","[38759, 3876]"


In [23]:
def _top_price(side_str: str) -> float | None:
    """Return the first price in a stringified list of bids/asks."""
    try:
        side = ast.literal_eval(side_str)
        return float(side[0][0]) if side else None
    except Exception:
        return None

def load_binance(path: str) -> pd.DataFrame:
    df = pd.read_csv(path)           # columns: time, bids, asks
    df["best_bid"] = df["bids"].apply(_top_price)
    df["best_ask"] = df["asks"].apply(_top_price)

    return df[["time", "best_bid", "best_ask"]].sort_values("time")

binance_df = load_binance("/home/tobias/personal-dex-trading/out/data/20250731_124719_binance_ws_orderbook.csv")
binance_df["time"] = pd.to_datetime(binance_df["time"])
binance_df

Unnamed: 0,time,best_bid,best_ask
0,2025-07-31 11:47:12,3858.37,3858.38
1,2025-07-31 11:47:13,3858.37,3858.38
2,2025-07-31 11:47:14,3858.37,3858.38
3,2025-07-31 11:47:15,3858.37,3858.38
4,2025-07-31 11:47:16,3858.37,3858.38
...,...,...,...
3595,2025-07-31 12:47:07,3863.38,3863.39
3596,2025-07-31 12:47:08,3863.38,3863.39
3597,2025-07-31 12:47:09,3863.48,3863.49
3598,2025-07-31 12:47:10,3863.48,3863.49


In [24]:
merged_df =  pd.merge_asof(
    binance_df.sort_values("time"),
    df_unichain.sort_index(),
    left_on="time",
    right_index=True,
    direction="backward"
)
merged_df["bid_0.00001_weth"] = (merged_df["token1_outputs"].apply(ast.literal_eval).apply(lambda x: x[0] if isinstance(x, list) else None) / 10)
merged_df["ask_0.00001_weth"] = (merged_df["token1_inputs"].apply(ast.literal_eval).apply(lambda x: x[0] if isinstance(x, list) else None) / 10)
merged_df

Unnamed: 0,time,best_bid,best_ask,token0_amounts,token1_outputs,token1_inputs,bid_0.00001_weth,ask_0.00001_weth
0,2025-07-31 11:47:12,3858.37,3858.38,"[10000000000000, 1000000000000]","[38531, 3853]","[38765, 3877]",3853.1,3876.5
1,2025-07-31 11:47:13,3858.37,3858.38,"[10000000000000, 1000000000000]","[38531, 3853]","[38765, 3877]",3853.1,3876.5
2,2025-07-31 11:47:14,3858.37,3858.38,"[10000000000000, 1000000000000]","[38531, 3853]","[38765, 3877]",3853.1,3876.5
3,2025-07-31 11:47:15,3858.37,3858.38,"[10000000000000, 1000000000000]","[38531, 3853]","[38765, 3877]",3853.1,3876.5
4,2025-07-31 11:47:16,3858.37,3858.38,"[10000000000000, 1000000000000]","[38531, 3853]","[38765, 3877]",3853.1,3876.5
...,...,...,...,...,...,...,...,...
3595,2025-07-31 12:47:07,3863.38,3863.39,"[10000000000000, 1000000000000]","[38525, 3852]","[38759, 3876]",3852.5,3875.9
3596,2025-07-31 12:47:08,3863.38,3863.39,"[10000000000000, 1000000000000]","[38525, 3852]","[38759, 3876]",3852.5,3875.9
3597,2025-07-31 12:47:09,3863.48,3863.49,"[10000000000000, 1000000000000]","[38525, 3852]","[38759, 3876]",3852.5,3875.9
3598,2025-07-31 12:47:10,3863.48,3863.49,"[10000000000000, 1000000000000]","[38525, 3852]","[38759, 3876]",3852.5,3875.9


In [25]:
import plotly.express as px

long_df = merged_df.melt(id_vars="time",
                         value_vars=["best_bid", "best_ask", "bid_0.00001_weth", "ask_0.00001_weth"],
                         var_name="Type",
                         value_name="Price")

# Plot with Plotly Express
fig = px.line(
    long_df,
    x="time",
    y="Price",
    color="Type",
    markers=True,
    title="Best Bids/Asks vs Bid/Ask for 0.00001 WETH",
    labels={"time": "Time", "Price": "Price", "Type": "Legend"}
)

# Update layout for better readability
fig.update_layout(
    xaxis=dict(showgrid=True, tickformat='%H:%M:%S'),
    yaxis=dict(showgrid=True),
    hovermode="x unified"  # Show hover labels for all lines
)

# Show the plot
fig.show()

In [42]:
BINANCE_CSV   = "/home/tobias/personal-dex-trading/out/data/order_book.csv"
UNICHAIN_CSV  = "/home/tobias/personal-dex-trading/out/data/block_data.csv"

def _top_price(side_str: str) -> float | None:
    """Return the first price in a stringified list of bids/asks."""
    try:
        side = ast.literal_eval(side_str)
        return float(side[0][0]) if side else None
    except Exception:
        return None

def load_binance(path: str) -> pd.DataFrame:
    df = pd.read_csv(path)           # columns: time, bids, asks
    df["best_bid"] = df["bids"].apply(_top_price)
    df["best_ask"] = df["asks"].apply(_top_price)

    return df[["time", "best_bid", "best_ask"]].sort_values("time")

def load_unichain(path: str) -> pd.DataFrame:
    df = pd.read_csv(path, index_col="timestamp")
    df.index = pd.to_datetime(df.index, unit="s")  # seconds -> datetime
    df.index += pd.Timedelta(hours=2)              # match binance tz

    # Pivot so each input size has its own column of output_value
    df_pivot = (
        df.reset_index()
          .pivot_table(index="timestamp",
                       columns="input_value",
                       values="output_value")
          .rename_axis(None, axis=1)
          .sort_index()
    )

    # Convert WETH→USDC **prices** (USDC per 1 WETH) for each size
    price_cols = {}
    for in_size in df_pivot.columns:
        price_cols[f"uni_px_{in_size}"] = df_pivot[in_size] / in_size

    df_px = pd.concat(price_cols, axis=1)
    return pd.concat([df_pivot, df_px], axis=1).reset_index(names="time")

def merge_dfs(df_bin: pd.DataFrame, df_uni: pd.DataFrame) -> pd.DataFrame:
    df_bin['time'] = pd.to_datetime(df_bin['time'])
    df_uni['time'] = pd.to_datetime(df_uni['time'])
    df_uni_unique = df_uni.drop_duplicates(subset=['time'], keep='first')
    merged_df = pd.merge(
        df_bin,
        df_uni_unique,
        on='time',
        how='left'
    )
    return merged_df

def add_arbitrage_cols(df: pd.DataFrame) -> pd.DataFrame:
    """
    A. Long-Binance / Short-Unichain:
         buy WETH on Binance @ best_ask, immediately sell on Unichain.
    B. Long-Unichain / Short-Binance:
         buy WETH on Unichain (reverse swap) and sell on Binance @ best_bid.
       For (B) we only have the forward quote; a rough proxy is to use
       best_bid / uni_px   (not perfect – use reverse quote if available).
    """
    # We’ll take the smallest quoted size (0.0001 WETH) as representative:
    uni_price = df["uni_px_0.0001"]

    df["edge_binance→uni"] = uni_price - df["best_ask"]   # positive ⇒ profit
    df["edge_uni→binance"] = df["best_bid"] - uni_price   # positive ⇒ profit
    return df

def main():
    df_bin = load_binance(BINANCE_CSV)
    df_uni = load_unichain(UNICHAIN_CSV)

    merged = merge_dfs(df_bin, df_uni)
    merged = add_arbitrage_cols(merged)

    # Save or plot if you wish:
    #merged.to_csv("merged_for_analysis.csv", index=False)
    return merged

In [45]:
merged = main()
merged

Unnamed: 0,time,best_bid,best_ask,0.0001,0.001,0.01,uni_px_0.0001,uni_px_0.001,uni_px_0.01,edge_binance→uni,edge_uni→binance
0,2025-07-29 16:22:46,3808.20,3808.21,,,,,,,,
1,2025-07-29 16:22:47,3808.10,3808.11,0.379535,3.793543,37.755463,3795.35,3793.543,3775.5463,-12.76,12.75
2,2025-07-29 16:22:48,3807.84,3807.85,0.379535,3.793543,37.755463,3795.35,3793.543,3775.5463,-12.50,12.49
3,2025-07-29 16:22:49,3807.78,3807.79,0.379535,3.793543,37.755463,3795.35,3793.543,3775.5463,-12.44,12.43
4,2025-07-29 16:22:50,3807.79,3807.80,0.379535,3.793543,37.755463,3795.35,3793.543,3775.5463,-12.45,12.44
...,...,...,...,...,...,...,...,...,...,...,...
559,2025-07-29 16:32:05,3818.92,3818.93,0.380312,3.801312,37.832594,3803.12,3801.312,3783.2594,-15.81,15.80
560,2025-07-29 16:32:06,3818.92,3818.93,0.380312,3.801312,37.832594,3803.12,3801.312,3783.2594,-15.81,15.80
561,2025-07-29 16:32:07,3818.92,3818.93,0.380312,3.801312,37.832594,3803.12,3801.312,3783.2594,-15.81,15.80
562,2025-07-29 16:32:08,3819.01,3819.02,0.380312,3.801312,37.832594,3803.12,3801.312,3783.2594,-15.90,15.89
