In [3]:
%load_ext autoreload
%autoreload 2

import numpy as np
from account.Binance import Binance
import pandas as pd
import numpy as np
import warnings
from tqdm import tqdm
import cvxpy as cp
from utils.logging import get_logger
from utils.data_helper import *
from utils.db import *
from strategy_v3.Strategy import ExchangeArbitrageStrategy

pd.set_option('display.max_rows', 50)
pd.set_option('display.max_columns', 50)
pd.options.display.float_format = "{:,.4f}".format
warnings.filterwarnings('ignore')

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Expermental Arbitrage strategy in Binance

1. Get bid/ask price for all active currency pairs in Binance and presents in a matrix $Q$

2. Transform the price to negative log price (Given converison from currency A->B->C = log(p1*p2) ~= log(p1) + log(p2))

3. Solve the optimization follow classic Traveling Salesmen Problem (TSP), but removing the constraints that all nodes needs to be visited once

- We want to find a closed loop where the sum of path values are negative

- Input $X$ is the nxn binary matrix (n is number of assets), 1 represents trade from currency x -> y

- Minimize $X$ dot $Q$

Reference: https://nbviewer.org/github/rcroessmann/sharing_public/blob/master/arbitrage_identification.ipynb


In [2]:
strategy = ExchangeArbitrageStrategy(zero_fees=False, max_trades=5, bid_ask_min=2, min_pnl=0)
strategy.set_strategy_id("qa")
strategy.load_data()
strategy.optimize()
#df_trades = strategy.df_trades
#df_trades

[32;20m2025-02-03 02:53:48,104 - 739184 - INFO - trade_size: 50[0m
[32;20m2025-02-03 02:53:48,111 - 739184 - INFO - bid_ask_min: 2 ($100)[0m
[32;20m2025-02-03 02:53:48,112 - 739184 - INFO - zero_fees: False[0m
[32;20m2025-02-03 02:53:48,130 - 739184 - INFO - max_trades: 5[0m
[32;20m2025-02-03 02:53:48,137 - 739184 - INFO - min_pnl: 0bps[0m
[32;20m2025-02-03 02:53:48,142 - 739184 - INFO - is_execute: False[0m
[32;20m2025-02-03 02:53:48,330 - qa - INFO - {'USDT': 50, 'ETH': 0.0005149006851834908, 'BTC': 0.017065108508492452}[0m
[32;20m2025-02-03 02:53:48,434 - qa - INFO - Removed 104 out of 1338 symbols given both bid/ask are non-tradable <$100[0m
[32;20m2025-02-03 02:53:48,512 - qa - INFO - 346 bid/ask quotes are skipped given size <$100.[0m


                                     CVXPY                                     
                                     v1.3.2                                    
(CVXPY) Feb 03 02:53:48 AM: Your problem has 152881 variables, 5 constraints, and 0 parameters.
(CVXPY) Feb 03 02:53:48 AM: It is compliant with the following grammars: DCP, DQCP
(CVXPY) Feb 03 02:53:48 AM: (If you need to solve this problem multiple times, but with different data, consider using parameters.)
(CVXPY) Feb 03 02:53:48 AM: CVXPY will first compile your problem; then, it will invoke a numerical solver to obtain a solution.
-------------------------------------------------------------------------------
                                  Compilation                                  
-------------------------------------------------------------------------------
(CVXPY) Feb 03 02:53:48 AM: Compiling problem (target solver=SCIPY).
(CVXPY) Feb 03 02:53:48 AM: Reduction chain: Dcp2Cone -> CvxAttr2Constr -> ConeMatrixStuffi

[32;20m2025-02-03 02:53:54,916 - qa - INFO - CVXPY - Status: optimal[0m
[32;20m2025-02-03 02:53:54,917 - qa - INFO - CVXPY - Optimal value: 0.0[0m
[32;20m2025-02-03 02:53:54,924 - qa - INFO - Total PNL: 0.00bps[0m
[32;20m2025-02-03 02:53:54,934 - qa - INFO - No optimal trades found, end here.[0m


False

In [190]:
# duck("binance_arb_zero_fees", read_only=False).query(f"delete from trades where price_time::date <= '{datetime.today() - BDay(0):%Y-%m-%d}'")

Unnamed: 0,Count
0,27563


# Summary of strategy

###  breakdown the pnl slippage by
1. price slippage => difference between theoretical price and fill price
    - This can be improved by looking into order books and derive a fair market price based on trade size
    - Or just remove ccys with bid/ask notional smaller than x% of trade size

2. qty slippage => ccy might not be fully executed when baseAsset is not from_asset
    - Could mitigate by increase the trade size

3. commission
    - Limit the number of trades in optimization

In [4]:
df_trade_ccy_price = pd.DataFrame({
    'trade_ccy': ['USDT', 'ETH', 'BTC'], 
    'price': [1, float(Binance().client.get_avg_price(symbol="ETHUSDT")['price']), float(Binance().client.get_avg_price(symbol="BTCUSDT")['price'])]}
)

In [6]:
db = duck("binance_arb_zero_fees")
db.query("""--sql
    select
        price_time::date as "date"
        , epoch(price_time)::int as id
        , strftime(price_time, '%Y-%m-%d %H:%M:%S') as time
        , 10000 * (product(mkt_price_w_fee) - 1) as pnl_net
        , 10000 * (product(mkt_price) - 1) as pnl_gross
        , pnl_gross - pnl_net as fees
        , count(1) as num_trades
        , string_agg(from_asset, ', ') as ccy
        , bool_or(from_asset in ('USDT', 'ETH', 'BTC')) as tradable                       
    from trades         
    where 
        1=1                
    group by price_time      
    order by price_time desc    
    limit 20
""")

Unnamed: 0,date,id,time,pnl_net,pnl_gross,fees,num_trades,ccy,tradable
0,2025-02-04,1738632962,2025-02-04 01:36:01,0.8078,30.8691,30.0614,3,"ETH, PLN, USDT",True
1,2025-02-04,1738628660,2025-02-04 00:24:20,12.6425,42.7409,30.0984,3,"EGLD, USDT, RON",True
2,2025-02-04,1738628546,2025-02-04 00:22:25,3.4399,33.5102,30.0703,3,"EGLD, USDT, RON",True
3,2025-02-03,1738611555,2025-02-03 19:39:15,12.3516,41.9455,29.5939,5,"ETH, UAH, USDT, FDUSD, USDC",True
4,2025-02-03,1738611526,2025-02-03 19:38:46,10.7029,40.7941,30.0912,3,"ETH, UAH, USDT",True
5,2025-02-03,1738611502,2025-02-03 19:38:22,4.8825,34.9571,30.0746,5,"ETH, UAH, USDT, EURI, EUR",True
6,2025-02-03,1738609756,2025-02-03 19:09:16,14.0548,44.1579,30.1031,3,"ETH, UAH, USDT",True
7,2025-02-03,1738609710,2025-02-03 19:08:30,24.5587,54.6924,30.1337,3,"ETH, UAH, USDT",True
8,2025-02-03,1738605254,2025-02-03 17:54:14,4.2489,34.3224,30.0736,3,"BTC, MXN, USDT",True
9,2025-02-03,1738602733,2025-02-03 17:12:12,12.7316,42.8295,30.0979,3,"TRX, WIN, USDT",True


In [7]:
db = duck("binance_arb")
db.query("""--sql
    select
        price_time::date as "date"
        , epoch(price_time)::int as id
        , strftime(price_time, '%Y-%m-%d %H:%M:%S') as time
        , 10000 * (product(mkt_price_w_fee) - 1) as pnl_net
        , 10000 * (product(mkt_price) - 1) as pnl_gross
        , pnl_gross - pnl_net as fees
        , count(1) as num_trades
        , string_agg(from_asset, ', ') as ccy
        , bool_or(from_asset in ('USDT', 'ETH', 'BTC')) as tradable                       
    from trades         
    where 
        1=1                
    group by price_time       
    order by price_time desc    
    limit 20
""")

Unnamed: 0,date,id,time,pnl_net,pnl_gross,fees,num_trades,ccy,tradable
0,2025-02-03,1738578668,2025-02-03 10:31:07,0.2833,40.3848,40.1015,4,"BTC, USDT, SHIB, JPY",True
1,2025-02-03,1738578625,2025-02-03 10:30:24,30.1942,70.4153,40.2211,4,"BTC, FDUSD, SHIB, JPY",True
2,2025-02-03,1738577249,2025-02-03 10:07:28,113.2836,152.8221,39.5385,4,"BCH, USDC, DYDX, BTC",True
3,2025-01-29,1738164126,2025-01-29 15:22:05,0.0034,0.0034,0.0,3,"FDUSD, USDT, USDC",True
4,2025-01-28,1738101186,2025-01-28 21:53:05,-0.0007,-0.0007,0.0,3,"FDUSD, USDC, USDT",True
5,2025-01-28,1738067766,2025-01-28 12:36:06,0.0003,0.0003,0.0,3,"FDUSD, USDC, USDT",True
6,2025-01-28,1738067165,2025-01-28 12:26:04,0.0003,0.0003,0.0,3,"FDUSD, USDC, USDT",True
7,2025-01-28,1738065005,2025-01-28 11:50:05,-0.0001,-0.0001,0.0,3,"FDUSD, USDC, USDT",True
8,2025-01-28,1738055046,2025-01-28 09:04:05,10.9597,51.1041,40.1444,4,"FDUSD, SOL, USDT, PENGU",True
9,2025-01-28,1738030805,2025-01-28 02:20:04,0.004,0.004,0.0,3,"FDUSD, USDT, USDC",True


In [8]:
db = duck("binance_arb")

# get raw orders
df_orders = db.query("""--sql
    select 
        *            
        , epoch(price_time)::int as id
        , price_time::date as "date"        
        , strftime(price_time, '%Y-%m-%d %H:%M:%S') as time
        , string_agg(distinct from_asset, ', ') over (partition by id, "group" order by "order" rows between unbounded preceding and unbounded following) as ccys        
        , first(from_asset) over (partition by price_time, "group" order by "order" rows between unbounded preceding and unbounded following) as trade_ccy
        , case when to_asset = quoteAsset then fill_price else 1/fill_price end as fill_price_adj                             
    from orders  
    where 1 = 1    
""")

# aggregate Orders to trade level
df_fills = duckdb.query("""--sql
    select
        "date"
        , "time"
        , "id"
        , ccys
        , trade_ccy
        , "order"
        , symbol
        , from_asset
        , to_asset                            
        , avg(mkt_price) as theo_px
        , avg(mkt_price_w_fee) as theo_px_w_fee
        , sum(fill_price_adj * fill_qty) / sum(fill_qty) as fill_px        
        , sum(from_asset_qty) as from_qty
        , sum(to_asset_qty) as to_qty                
        , sum(to_asset_comms_qty) as comms_qty
        , ifnull(lag(sum(to_asset_qty)) over (partition by id order by "order"), from_qty) as start_qty
        , start_qty - from_qty as residual_qty
        , 10000 * (fill_px / theo_px - 1) as px_slippage
        , 10000 * (-residual_qty / start_qty) as qty_slippage
        , 10000 * (-comms_qty / (comms_qty + to_qty)) as comms      
        , 10000 * ((1+px_slippage/10000) * (1+qty_slippage/10000) * (1+comms/10000) - 1) as slippage                                                            
        , count(1) as fills        
    from df_orders    
    group by "date", "time", "id", ccys, trade_ccy, "order", from_asset, to_asset, symbol
    order by id, "order"
""").to_df()

In [9]:
df_fills_agg = duckdb.query("""--sql
    select
        "date"
        , id
        , "time"
        , ccys
        , t1.trade_ccy
        , first(from_asset) = last(to_asset) as valid
        , sum(fills)::int as fills
        , count(1) as trades        
        , first(from_qty) as from_qty
        , last(to_qty) as to_qty
        , last(to_qty) - first(from_qty) as net_qty
        , net_qty * avg(price) as net_pnl_usd 
        , 10000 * net_qty / first(from_qty) as realized_pnl
        , 10000 * (product(theo_px) - 1) as theo_gross_pnl
        , 10000 * (product(theo_px_w_fee) - 1) as theo_net_pnl                            
        , 10000 * (product(1+px_slippage/10000)-1) as px_slippage
        , 10000 * (product(1+qty_slippage/10000)-1) as qty_slippage
        , 10000 * (product(1+comms/10000)-1) as comms                 

    from df_fills t1
    left join df_trade_ccy_price t2 on t1.trade_ccy = t2.trade_ccy
    group by "date", id, "time", ccys, t1.trade_ccy 
    order by time desc
""").to_df()

df_fills_agg

Unnamed: 0,date,id,time,ccys,trade_ccy,valid,fills,trades,from_qty,to_qty,net_qty,net_pnl_usd,realized_pnl,theo_gross_pnl,theo_net_pnl,px_slippage,qty_slippage,comms
0,2025-02-03,1738578625,2025-02-03 10:30:24,"BTC, FDUSD, SHIB, JPY",BTC,True,15,4,0.0203,0.0203,-0.0,-2.4832,-12.4654,70.4153,30.1942,-42.2341,-0.2948,-39.9425
1,2025-02-03,1738577249,2025-02-03 10:07:28,"BTC, BCH, USDC, DYDX",BTC,True,62,4,0.0215,0.0172,-0.0044,-428.0604,-2024.9112,152.8221,113.2836,-1790.3484,-0.0773,-38.9428
2,2025-01-21,1737461405,2025-01-21 12:10:05,"USDT, IOTX, JPY, BTC",USDT,True,7,4,49.9814,50.1474,0.1659,0.1659,33.1968,97.0209,56.6943,-6.0647,-17.3306,-39.9197
3,2025-01-21,1737451984,2025-01-21 09:33:04,"USDT, XRP, MXN",USDT,True,5,3,49.4,46.953,-2.447,-2.447,-495.3445,84.945,54.7214,-316.3797,-238.2282,-29.97
4,2025-01-21,1737431705,2025-01-21 03:55:05,"USDT, USDC, TRUMP",USDT,True,5,3,49.0539,48.851,-0.2029,-0.2029,-41.3712,178.3774,158.5394,-185.8366,-11.1613,-19.4905
5,2025-01-21,1737427205,2025-01-21 02:40:05,"ETH, IOTX, JPY",ETH,True,5,3,0.0154,0.0155,0.0001,0.2544,61.6712,89.4338,59.196,9.3859,-6.916,-29.97
6,2025-01-21,1737423786,2025-01-21 01:43:06,"USDT, EURI, BTC, HIVE",USDT,True,4,4,49.9248,48.2046,-1.7202,-1.7202,-344.549,103.9847,73.7024,-137.8807,-281.1878,-29.97
7,2025-01-21,1737418386,2025-01-21 00:13:05,"BTC, USDC, PEPE, JPY",BTC,True,5,4,0.0005,0.0005,-0.0,-0.1493,-29.2303,99.0738,59.7451,-74.3081,-14.2697,-38.9182
8,2025-01-20,1737387365,2025-01-20 15:36:04,"BTC, BRL, PEPE, JPY",BTC,True,10,4,0.0005,0.0005,-0.0,-0.1493,-29.2304,90.5819,50.28,-63.8951,-15.4549,-39.7998
9,2025-01-20,1737387246,2025-01-20 15:34:05,"BTC, USDC, PEPE, JPY",BTC,True,9,4,0.0005,0.0005,0.0,0.1454,28.4613,108.2882,68.9236,-33.8029,-6.4423,-38.9048


In [13]:
df_fills[df_fills['id'] == 1738577249]

Unnamed: 0,date,time,id,ccys,trade_ccy,order,symbol,from_asset,to_asset,theo_px,theo_px_w_fee,fill_px,from_qty,to_qty,comms_qty,start_qty,residual_qty,px_slippage,qty_slippage,comms,slippage,fills
121,2025-02-03,2025-02-03 10:07:28,1738577249,"BTC, BCH, USDC, DYDX",BTC,1,BCHBTC,BTC,BCH,322.3727,322.0503,324.1491,0.0215,6.969,0.007,0.0215,0.0,55.1059,-0.0,-10.0,45.0508,1
122,2025-02-03,2025-02-03 10:07:28,1738577249,"BTC, BCH, USDC, DYDX",BTC,2,BCHUSDC,BCH,USDC,285.0,284.7292,280.0761,6.969,1949.9962,1.8543,6.969,0.0,-172.7676,-0.0348,-9.5,-182.1377,6
123,2025-02-03,2025-02-03 10:07:28,1738577249,"BTC, BCH, USDC, DYDX",BTC,3,DYDXUSDC,USDC,DYDX,1.6518,1.6502,1.4122,1949.9932,2642.6571,2.5129,1949.9962,0.0031,-1450.4946,-0.0157,-9.5,-1458.63,52
124,2025-02-03,2025-02-03 10:07:28,1738577249,"BTC, BCH, USDC, DYDX",BTC,4,DYDXBTC,DYDX,BTC,0.0,0.0,0.0,2642.65,0.0172,0.0,2642.6571,0.0071,-282.2468,-0.0268,-9.9998,-291.9904,3


# Trade the arbitrage pair(s)
- Trade the pairs with highest pnl
- The arbitrage loop starts with one existing currency in current portfolios

- commission are included in quote quantity?

In [97]:
trade_currency = {
    'USDT': 50,
    'ETH':  0.01539456264,
    'BTC': 0.00052979531,
}

In [11]:
client = Binance().client
balance = client.get_account()
balance = pd.DataFrame(balance['balances'])
balance['free'] = balance['free'].astype(float)
balance['locked'] = balance['locked'].astype(float)
balance = balance[balance['free'] > 0]
balance

Unnamed: 0,asset,free,locked
0,BTC,0.0938,0.0
2,ETH,1.8625,0.0
4,BNB,0.0,0.0
7,SNT,0.813,0.0
11,USDT,9224.2095,0.0
22,TRX,0.0821,0.0
57,VIB,0.316,0.0
61,XRP,2.7227,0.0
86,ADA,0.0734,0.0
88,XLM,99.9,0.0
