In [76]:
%load_ext autoreload
%autoreload 2

import numpy as np
from account import Binance
import pandas as pd
import numpy as np
import warnings
from tqdm import tqdm
import cvxpy as cp
from utils.logging import get_logger
from utils.data_helper import *
from utils.db import *
from strategy_v3.Strategy import ExchangeArbitrageStrategy

pd.set_option('display.max_rows', 50)
pd.set_option('display.max_columns', 50)
pd.options.display.float_format = "{:,.4f}".format
warnings.filterwarnings('ignore')

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Expermental Arbitrage strategy in Binance

1. Get bid/ask price for all active currency pairs in Binance and presents in a matrix $Q$

2. Transform the price to negative log price (Given converison from currency A->B->C = log(p1*p2) ~= log(p1) + log(p2))

3. Solve the optimization follow classic Traveling Salesmen Problem (TSP), but removing the constraints that all nodes needs to be visited once

- We want to find a closed loop where the sum of path values are negative

- Input $X$ is the nxn binary matrix (n is number of assets), 1 represents trade from currency x -> y

- Minimize $X$ dot $Q$

Reference: https://nbviewer.org/github/rcroessmann/sharing_public/blob/master/arbitrage_identification.ipynb


In [86]:
strategy = ExchangeArbitrageStrategy(zero_fees=True, max_trades=3)
strategy.set_strategy_id("qa")
strategy.load_data()
strategy.optimize()
df_trades = strategy.df_trades
df_trades

[32;20m2025-01-22 01:54:45,852 - 10559 - INFO - zero_fees: True[0m
[32;20m2025-01-22 01:54:45,854 - 10559 - INFO - max_trades: 3[0m
[32;20m2025-01-22 01:54:45,854 - 10559 - INFO - min_pnl: 50bps[0m
[32;20m2025-01-22 01:54:45,855 - 10559 - INFO - {'USDT': 50, 'ETH': 0.01539456264, 'BTC': 0.00052979531}[0m


                                     CVXPY                                     
                                     v1.3.2                                    
(CVXPY) Jan 22 01:54:46 AM: Your problem has 164836 variables, 5 constraints, and 0 parameters.
(CVXPY) Jan 22 01:54:46 AM: It is compliant with the following grammars: DCP, DQCP
(CVXPY) Jan 22 01:54:46 AM: (If you need to solve this problem multiple times, but with different data, consider using parameters.)
(CVXPY) Jan 22 01:54:46 AM: CVXPY will first compile your problem; then, it will invoke a numerical solver to obtain a solution.
-------------------------------------------------------------------------------
                                  Compilation                                  
-------------------------------------------------------------------------------
(CVXPY) Jan 22 01:54:46 AM: Compiling problem (target solver=SCIPY).
(CVXPY) Jan 22 01:54:46 AM: Reduction chain: Dcp2Cone -> CvxAttr2Constr -> ConeMatrixStuffi

[32;20m2025-01-22 01:54:53,572 - qa - INFO - CVXPY - Status: optimal[0m
[32;20m2025-01-22 01:54:53,573 - qa - INFO - CVSPY - Optimal value: -0.0005726527839131634[0m
[32;20m2025-01-22 01:54:53,575 - qa - INFO - Total PNL: -34.24bps[0m
[32;20m2025-01-22 01:54:53,732 - qa - INFO - 
+---------+-----------------+---------------+---------+
|   group |   gross_pnl_bps |   net_pnl_bps |   count |
|---------+-----------------+---------------+---------|
|       1 |         5.72817 |      -34.2422 |       3 |
+---------+-----------------+---------------+---------+[0m
[32;20m2025-01-22 01:54:53,734 - qa - INFO - Net pnl is smaller than min_pnl (50bps). end here.[0m


ALTER TABLE trades ADD COLUMN usd_price DOUBLE


Unnamed: 0,from_asset,to_asset,mkt_price,fee,group,order,mkt_price_w_fee,symbol,status,baseAsset,quoteAsset,stepSize,tickSize,qty_decimal,price_decimal,bidPrice,bidQty,askPrice,askQty,makerCommission,takerCommission,usd_price,side,count,price_time,zero_fees
0,AIXBT,USDT,0.6549,0.001,1,1,0.6542,AIXBTUSDT,TRADING,AIXBT,USDT,0.1,0.0001,1,4,0.6549,165.2,0.655,4409.7,0.001,0.001,0.655,SELL,1,2025-01-22 01:54:45.935954+08:00,True
1,USDT,TRY,35.69,0.0015,1,2,35.6365,USDTTRY,TRADING,USDT,TRY,1.0,0.01,0,2,35.69,612300.0,35.7,2468902.0,0.001,0.0015,1.0,SELL,1,2025-01-22 01:54:45.935954+08:00,True
2,TRY,AIXBT,0.0428,0.0015,1,3,0.0427,AIXBTTRY,TRADING,AIXBT,TRY,0.1,0.01,1,2,23.34,115.8,23.36,83.8,0.001,0.0015,0.655,BUY,1,2025-01-22 01:54:45.935954+08:00,True


# Summary of strategy

###  breakdown the pnl slippage by
1. price slippage => difference between theoretical price and fill price
    - This can be improved by looking into order books and derive a fair market price based on trade size
    - Or just remove ccys with bid/ask notional smaller than x% of trade size

2. qty slippage => ccy might not be fully executed when baseAsset is not from_asset
    - Could mitigate by increase the trade size

3. commission
    - Limit the number of trades in optimization

In [88]:
client = Binance().get_client()
trade_ccy_price = {
    'USDT': 1,
    'BTC': float(client.get_avg_price(symbol='BTCUSDT')['price']),
    'ETH': float(client.get_avg_price(symbol='ETHUSDT')['price']),
}

df_trade_ccy_price = pd.DataFrame([x for x in trade_ccy_price.items()], columns=['trade_ccy', 'price'])
df_trade_ccy_price

Unnamed: 0,trade_ccy,price
0,USDT,1.0
1,BTC,106123.8667
2,ETH,3350.1523


In [93]:
db = duck("binance_arb")
db.query("""--sql
    select
        price_time::date as "date"
        , epoch(price_time)::int as id
        , strftime(price_time, '%Y-%m-%d %H:%M:%S') as time
        , 10000 * (product(mkt_price_w_fee) - 1) as pnl_net
        , 10000 * (product(mkt_price) - 1) as pnl_gross
        , pnl_gross - pnl_net as fees
        , count(1) as num_trades
        , string_agg(from_asset, ', ') as ccy
        , bool_or(from_asset in ('USDT', 'ETH', 'BTC')) as tradable                
    from trades         
    where 
        1=1                
    group by price_time       
    order by price_time desc    
    limit 30
""")

Unnamed: 0,date,id,time,pnl_net,pnl_gross,fees,num_trades,ccy,tradable
0,2025-01-22,1737508566,2025-01-22 01:16:05,-0.0001,-0.0001,0.0,3,"FDUSD, USDC, USDT",True
1,2025-01-22,1737506946,2025-01-22 00:49:06,0.1137,20.1443,20.0306,3,"BTC, USDT, EURI",True
2,2025-01-21,1737498365,2025-01-21 22:26:05,4.1584,23.6947,19.5363,4,"BTC, USDC, USDT, EURI",True
3,2025-01-21,1737482945,2025-01-21 18:09:05,11.0024,40.5935,29.5911,4,"MXN, XRP, USDC, USDT",True
4,2025-01-21,1737482886,2025-01-21 18:08:05,3.3034,33.3741,30.0707,3,"MXN, XRP, USDT",True
5,2025-01-21,1737482824,2025-01-21 18:07:04,12.8659,42.4621,29.5962,4,"MXN, XRP, USDC, USDT",True
6,2025-01-21,1737482766,2025-01-21 18:06:05,4.2733,34.3467,30.0734,3,"MXN, XRP, USDT",True
7,2025-01-21,1737482707,2025-01-21 18:05:07,7.3637,36.9434,29.5798,4,"MXN, XRP, USDC, USDT",True
8,2025-01-21,1737482414,2025-01-21 18:00:13,9.7703,39.8593,30.089,3,"MXN, XRP, USDT",True
9,2025-01-21,1737482346,2025-01-21 17:59:05,9.7703,39.8593,30.089,3,"MXN, XRP, USDT",True


In [87]:
db = duck("binance_arb")

# get raw orders
df_orders = db.query("""--sql
    select 
        *            
        , epoch(price_time)::int as id
        , price_time::date as "date"        
        , strftime(price_time, '%Y-%m-%d %H:%M:%S') as time
        , string_agg(distinct from_asset, ', ') over (partition by id, "group" order by "order" rows between unbounded preceding and unbounded following) as ccys        
        , first(from_asset) over (partition by price_time, "group" order by "order" rows between unbounded preceding and unbounded following) as trade_ccy
        , case when to_asset = quoteAsset then fill_price else 1/fill_price end as fill_price_adj                             
    from orders  
    where 1 = 1    
""")

# aggregate Orders to trade level
df_fills = duckdb.query("""--sql
    select
        "date"
        , "time"
        , "id"
        , ccys
        , trade_ccy
        , "order"
        , from_asset
        , to_asset                            
        , avg(mkt_price) as theo_px
        , avg(mkt_price_w_fee) as theo_px_w_fee
        , sum(fill_price_adj * fill_qty) / sum(fill_qty) as fill_px        
        , sum(from_asset_qty) as from_qty
        , sum(to_asset_qty) as to_qty                
        , sum(to_asset_comms_qty) as comms_qty
        , ifnull(lag(sum(to_asset_qty)) over (partition by id order by "order"), from_qty) as start_qty
        , start_qty - from_qty as residual_qty
        , 10000 * (fill_px / theo_px - 1) as px_slippage
        , 10000 * (-residual_qty / start_qty) as qty_slippage
        , 10000 * (-comms_qty / (comms_qty + to_qty)) as comms      
        , 10000 * ((1+px_slippage/10000) * (1+qty_slippage/10000) * (1+comms/10000) - 1) as slippage                                                            
        , count(1) as fills        
    from df_orders    
    group by "date", "time", "id", ccys, trade_ccy, "order", from_asset, to_asset
    order by id, "order"
""").to_df()

In [74]:
df_fills_agg = duckdb.query("""--sql
    select
        "date"
        , id
        , "time"
        , ccys
        , t1.trade_ccy
        , first(from_asset) = last(to_asset) as valid
        , sum(fills)::int as fills
        , count(1) as trades        
        , first(from_qty) as from_qty
        , last(to_qty) as to_qty
        , last(to_qty) - first(from_qty) as net_qty
        , net_qty * avg(price) as net_pnl_usd 
        , 10000 * net_qty / first(from_qty) as realized_pnl
        , 10000 * (product(theo_px) - 1) as theo_gross_pnl
        , 10000 * (product(theo_px_w_fee) - 1) as theo_net_pnl                            
        , 10000 * (product(1+px_slippage/10000)-1) as px_slippage
        , 10000 * (product(1+qty_slippage/10000)-1) as qty_slippage
        , 10000 * (product(1+comms/10000)-1) as comms                 

    from df_fills t1
    left join df_trade_ccy_price t2 on t1.trade_ccy = t2.trade_ccy
    group by "date", id, "time", ccys, t1.trade_ccy 
    order by time desc
""").to_df()

df_fills_agg

Unnamed: 0,date,id,time,ccys,trade_ccy,valid,fills,trades,from_qty,to_qty,net_qty,net_pnl_usd,realized_pnl,theo_gross_pnl,theo_net_pnl,px_slippage,qty_slippage,comms
0,2025-01-21,1737461405,2025-01-21 12:10:05,"USDT, IOTX, JPY, BTC",USDT,True,7,4,49.9814,50.1474,0.1659,0.1659,33.1968,97.0209,56.6943,-6.0647,-17.3306,-39.9197
1,2025-01-21,1737451984,2025-01-21 09:33:04,"USDT, XRP, MXN",USDT,True,5,3,49.4,46.953,-2.447,-2.447,-495.3445,84.945,54.7214,-316.3797,-238.2282,-29.97
2,2025-01-21,1737431705,2025-01-21 03:55:05,"USDT, USDC, TRUMP",USDT,True,5,3,49.0539,48.851,-0.2029,-0.2029,-41.3712,178.3774,158.5394,-185.8366,-11.1613,-19.4905
3,2025-01-21,1737427205,2025-01-21 02:40:05,"ETH, IOTX, JPY",ETH,True,5,3,0.0154,0.0155,0.0001,0.3146,61.6712,89.4338,59.196,9.3859,-6.916,-29.97
4,2025-01-21,1737423786,2025-01-21 01:43:06,"USDT, EURI, BTC, HIVE",USDT,True,4,4,49.9248,48.2046,-1.7202,-1.7202,-344.549,103.9847,73.7024,-137.8807,-281.1878,-29.97
5,2025-01-21,1737418386,2025-01-21 00:13:05,"BTC, USDC, PEPE, JPY",BTC,True,5,4,0.0005,0.0005,-0.0,-0.1571,-29.2303,99.0738,59.7451,-74.3081,-14.2697,-38.9182
6,2025-01-20,1737387365,2025-01-20 15:36:04,"BTC, BRL, PEPE, JPY",BTC,True,10,4,0.0005,0.0005,-0.0,-0.1571,-29.2304,90.5819,50.28,-63.8951,-15.4549,-39.7998
7,2025-01-20,1737387246,2025-01-20 15:34:05,"BTC, USDC, PEPE, JPY",BTC,True,9,4,0.0005,0.0005,0.0,0.1529,28.4613,108.2882,68.9236,-33.8029,-6.4423,-38.9048
8,2025-01-20,1737370144,2025-01-20 10:49:04,"USDT, RON, EGLD",USDT,True,7,3,50.0,48.7452,-1.2548,-1.2548,-250.959,126.2664,95.9174,-270.7469,-74.8605,-29.97
9,2025-01-20,1737352867,2025-01-20 06:01:06,"USDT, SNT, BTC, EURI",USDT,True,9,4,49.9727,48.5708,-1.4019,-1.4019,-280.5365,90.1658,59.9257,-215.7623,-125.2798,-30.067


In [75]:
df_fills[df_fills['id'] == 1737418386]

Unnamed: 0,date,time,id,ccys,trade_ccy,order,from_asset,to_asset,theo_px,theo_px_w_fee,fill_px,from_qty,to_qty,comms_qty,start_qty,residual_qty,px_slippage,qty_slippage,comms,slippage,fills
100,2025-01-21,2025-01-21 00:13:05,1737418386,"BTC, USDC, PEPE, JPY",BTC,1,BTC,USDC,104449.5781,104350.3516,104488.9007,0.0005,54.2826,0.0516,0.0005,0.0,3.7647,-0.0,-9.5,-5.7388,1
101,2025-01-21,2025-01-21 00:13:05,1737418386,"BTC, USDC, PEPE, JPY",BTC,2,USDC,PEPE,63251.1055,63191.0195,63211.1265,54.2826,3428004.25,3259.7,54.2826,0.0,-6.3207,-0.0028,-9.5,-15.8175,1
102,2025-01-21,2025-01-21 00:13:05,1737418386,"BTC, USDC, PEPE, JPY",BTC,3,PEPE,JPY,0.0025,0.0025,0.0025,3428004.0,8482.9828,8.4539,3428004.25,0.25,-71.8271,-0.0007,-9.9558,-81.7121,2
103,2025-01-21,2025-01-21 00:13:05,1737418386,"BTC, USDC, PEPE, JPY",BTC,4,JPY,BTC,0.0,0.0,0.0,8470.8809,0.0005,0.0,8482.9828,12.1019,0.0594,-14.2661,-10.0193,-24.2119,1


# Trade the arbitrage pair(s)
- Trade the pairs with highest pnl
- The arbitrage loop starts with one existing currency in current portfolios

- commission are included in quote quantity?

In [5]:
trade_currency = {
    'USDT': 50,
    'ETH':  0.01539456264,
    'BTC': 0.00052979531,
}

In [21]:
client = Binance().client
balance = client.get_account()
balance = pd.DataFrame(balance['balances'])
balance['free'] = balance['free'].astype(float)
balance['locked'] = balance['locked'].astype(float)
balance = balance[balance['free'] > 0]
balance

Unnamed: 0,asset,free,locked
0,BTC,0.0983,0.0
2,ETH,1.8632,0.0
4,BNB,0.0,0.0
7,SNT,0.813,0.0
11,USDT,9205.1128,0.0
22,TRX,0.0821,0.0
57,VIB,0.316,0.0
61,XRP,2.6387,0.0
86,ADA,0.0734,0.0
88,XLM,99.9,0.0
