In [1]:
%load_ext autoreload
%autoreload 2

import numpy as np
from account import Binance
import pandas as pd
import numpy as np
import warnings
from tqdm import tqdm
import cvxpy as cp
from utils.logging import get_logger
from utils.data_helper import *
from utils.db import *
from strategy_v3.Strategy import ExchangeArbitrageStrategy

pd.set_option('display.max_rows', 50)
pd.set_option('display.max_columns', 50)
pd.options.display.float_format = "{:,.4f}".format
warnings.filterwarnings('ignore')

# Expermental Arbitrage strategy in Binance

1. Get bid/ask price for all active currency pairs in Binance and presents in a matrix $Q$

2. Transform the price to negative log price (Given converison from currency A->B->C = log(p1*p2) ~= log(p1) + log(p2))

3. Solve the optimization follow classic Traveling Salesmen Problem (TSP), but removing the constraints that all nodes needs to be visited once

- We want to find a closed loop where the sum of path values are negative

- Input $X$ is the nxn binary matrix (n is number of assets), 1 represents trade from currency x -> y

- Minimize $X$ dot $Q$

Reference: https://nbviewer.org/github/rcroessmann/sharing_public/blob/master/arbitrage_identification.ipynb


In [7]:
strategy = ExchangeArbitrageStrategy(zero_fees=True, trades_num=3)
strategy.set_strategy_id("qa")
strategy.load_data()
strategy.optimize()
df_trades = strategy.df_trades
df_trades

[32;20m2025-01-17 03:22:52,121 - 830098 - INFO - {'USDT': 50, 'ETH': 0.01539456264, 'BTC': 0.00052979531}[0m


                                     CVXPY                                     
                                     v1.3.2                                    
(CVXPY) Jan 17 03:22:52 AM: Your problem has 163216 variables, 5 constraints, and 0 parameters.
(CVXPY) Jan 17 03:22:52 AM: It is compliant with the following grammars: DCP, DQCP
(CVXPY) Jan 17 03:22:52 AM: (If you need to solve this problem multiple times, but with different data, consider using parameters.)
(CVXPY) Jan 17 03:22:52 AM: CVXPY will first compile your problem; then, it will invoke a numerical solver to obtain a solution.
-------------------------------------------------------------------------------
                                  Compilation                                  
-------------------------------------------------------------------------------
(CVXPY) Jan 17 03:22:52 AM: Compiling problem (target solver=SCIPY).
(CVXPY) Jan 17 03:22:52 AM: Reduction chain: Dcp2Cone -> CvxAttr2Constr -> ConeMatrixStuffi

[32;20m2025-01-17 03:22:57,810 - qa - INFO - CVXPY - Status: optimal[0m
[32;20m2025-01-17 03:22:57,810 - qa - INFO - CVSPY - Optimal value: -0.0012980190073452746[0m
[32;20m2025-01-17 03:22:57,812 - qa - INFO - Total PNL: -0.1702%[0m
[32;20m2025-01-17 03:22:58,054 - qa - INFO - 
+---------+--------------+------------+---------+
|   group |   gross_pnl% |   net_pnl% |   count |
|---------+--------------+------------+---------|
|       1 |     0.129886 |  -0.170203 |       3 |
+---------+--------------+------------+---------+[0m
[32;20m2025-01-17 03:22:58,055 - qa - INFO - Net pnl is too small, end here.[0m


Unnamed: 0,from_asset,to_asset,mkt_price,fee,group,order,mkt_price_w_fee,symbol,status,baseAsset,quoteAsset,stepSize,tickSize,qty_decimal,price_decimal,bidPrice,bidQty,askPrice,askQty,makerCommission,takerCommission,side,count,price_time,zero_fees
0,ETH,IOTX,86956.5217,0.001,1,1,86869.5652,IOTXETH,TRADING,IOTX,ETH,1.0,0.0,0,8,0.0,6767.0,0.0,138.0,0.001,0.001,BUY,1,2025-01-17 03:22:52.209780+08:00,True
1,IOTX,JPY,5.951,0.001,1,2,5.945,IOTXJPY,TRADING,IOTX,JPY,1.0,0.001,0,3,5.951,6245.0,6.021,77727.0,0.0,0.001,SELL,1,2025-01-17 03:22:52.209780+08:00,True
2,JPY,ETH,0.0,0.001,1,3,0.0,ETHJPY,TRADING,ETH,JPY,0.0,1.0,5,0,516641.0,0.1,516807.0,0.2814,0.001,0.001,BUY,1,2025-01-17 03:22:52.209780+08:00,True


# Summary of strategy

###  breakdown the pnl slippage by
1. price slippage => difference between theoretical price and fill price
    - This can be improved by looking into order books and derive a fair market price based on trade size
    - Or just remove ccys with bid/ask notional smaller than x% of trade size

2. qty slippage => ccy might not be fully executed when baseAsset is not from_asset
    - Could mitigate by increase the trade size

3. commission
    - Limit the number of trades in optimization

In [265]:
client = Binance().get_client()
trade_ccy_price = {
    'USDT': 1,
    'BTC': float(client.get_avg_price(symbol='BTCUSDT')['price']),
    'ETH': float(client.get_avg_price(symbol='ETHUSDT')['price']),
}

df_trade_ccy_price = pd.DataFrame([x for x in trade_ccy_price.items()], columns=['trade_ccy', 'price'])
df_trade_ccy_price

Unnamed: 0,trade_ccy,price
0,USDT,1.0
1,BTC,104190.4751
2,ETH,3282.1077


In [276]:
db = duck("binance_arb")
db.query("""--sql
    select
        price_time::date as "date"
        , epoch(price_time)::int as id
        , strftime(price_time, '%Y-%m-%d %H:%m:%d') as time
        , 10000 * (product(mkt_price_w_fee) - 1) as pnl_net
        , 10000 * (product(mkt_price) - 1) as pnl_gross
        , pnl_gross - pnl_net as fees
        , count(1) as num_trades
        , string_agg(from_asset, ', ') as ccy
        , bool_or(from_asset in ('USDT', 'ETH', 'BTC')) as tradable        
    from trades         
    where 
        1=1                
    group by price_time       
    order by price_time desc    
    limit 20
""")

Unnamed: 0,date,id,time,pnl_net,pnl_gross,fees,num_trades,ccy,tradable
0,2025-01-19,1737248410,2025-01-19 01:01:19,18.5336,58.7088,40.1752,4,"BTC, USDT, SEI, JPY",True
1,2025-01-19,1737248345,2025-01-19 00:01:19,6.9799,37.0611,30.0811,3,"BTC, SEI, JPY",True
2,2025-01-19,1737245825,2025-01-19 00:01:19,34.0212,73.2493,39.228,4,"BTC, USDC, PEPE, JPY",True
3,2025-01-18,1737241927,2025-01-18 23:01:18,60.8592,91.1023,30.2431,4,"BTC, RON, USDT, TUSD",True
4,2025-01-18,1737235205,2025-01-18 21:01:18,20.5143,60.6966,40.1823,4,"BTC, JPY, IOTX, USDT",True
5,2025-01-18,1737232208,2025-01-18 20:01:18,8.0333,38.1174,30.0841,3,"BNB, SEI, JPY",False
6,2025-01-18,1737232147,2025-01-18 20:01:18,8.0333,38.1174,30.0841,3,"BNB, SEI, JPY",False
7,2025-01-18,1737231545,2025-01-18 20:01:18,6.9104,47.0378,40.1274,4,"BTC, USDT, IOTX, JPY",True
8,2025-01-18,1737231065,2025-01-18 20:01:18,14.0733,54.2292,40.1559,4,"BTC, USDT, IOTX, JPY",True
9,2025-01-18,1737230586,2025-01-18 20:01:18,6.099,46.2234,40.1244,4,"IOTX, JPY, SEI, USDT",True


In [272]:
db = duck("binance_arb")

# get raw orders
df_orders = db.query("""--sql
    select 
        *            
        , epoch(price_time)::int as id
        , price_time::date as "date"        
        , strftime(price_time, '%Y-%m-%d %H:%m:%d') as time
        , string_agg(distinct from_asset, ', ') over (partition by id, "group" order by "order" rows between unbounded preceding and unbounded following) as ccys        
        , first(from_asset) over (partition by price_time, "group" order by "order" rows between unbounded preceding and unbounded following) as trade_ccy
        , case when to_asset = quoteAsset then fill_price else 1/fill_price end as fill_price_adj                             
    from orders  
    where 1 = 1    
""")

# aggregate Orders to trade level
df_fills = duckdb.query("""--sql
    select
        "date"
        , "time"
        , "id"
        , ccys
        , trade_ccy
        , "order"
        , from_asset
        , to_asset                            
        , avg(mkt_price) as theo_px
        , avg(mkt_price_w_fee) as theo_px_w_fee
        , sum(fill_price_adj * fill_qty) / sum(fill_qty) as fill_px        
        , sum(from_asset_qty) as from_qty
        , sum(to_asset_qty) as to_qty                
        , sum(to_asset_comms_qty) as comms_qty
        , ifnull(lag(sum(to_asset_qty)) over (partition by id order by "order"), from_qty) as start_qty
        , start_qty - from_qty as residual_qty
        , 10000 * (fill_px / theo_px - 1) as px_slippage
        , 10000 * (-residual_qty / start_qty) as qty_slippage
        , 10000 * (-comms_qty / (comms_qty + to_qty)) as comms      
        , 10000 * ((1+px_slippage/10000) * (1+qty_slippage/10000) * (1+comms/10000) - 1) as slippage                                                            
        , count(1) as fills        
    from df_orders    
    group by "date", "time", "id", ccys, trade_ccy, "order", from_asset, to_asset
    order by id, "order"
""").to_df()

In [287]:
df_fills_agg = duckdb.query("""--sql
    select
        "date"
        , id
        , "time"
        , ccys
        , t1.trade_ccy
        , first(from_asset) = last(to_asset) as valid
        , sum(fills)::int as fills
        , count(1) as trades        
        , first(from_qty) as from_qty
        , last(to_qty) as to_qty
        , last(to_qty) - first(from_qty) as net_qty
        , net_qty * avg(price) as net_pnl_usd 
        , 10000 * net_qty / first(from_qty) as realized_pnl
        , 10000 * (product(theo_px) - 1) as theo_gross_pnl
        , 10000 * (product(theo_px_w_fee) - 1) as theo_net_pnl                            
        , 10000 * (product(1+px_slippage/10000)-1) as px_slippage
        , 10000 * (product(1+qty_slippage/10000)-1) as qty_slippage
        , 10000 * (product(1+comms/10000)-1) as comms                 

    from df_fills t1
    left join df_trade_ccy_price t2 on t1.trade_ccy = t2.trade_ccy
    group by "date", id, "time", ccys, t1.trade_ccy 
    order by time desc
""").to_df()

df_fills_agg

Unnamed: 0,date,id,time,ccys,trade_ccy,valid,fills,trades,from_qty,to_qty,net_qty,net_pnl_usd,realized_pnl,theo_gross_pnl,theo_net_pnl,px_slippage,qty_slippage,comms
0,2025-01-19,1737248410,2025-01-19 01:01:19,"USDT, SEI, JPY, BTC",USDT,True,6,4,49.9746,49.5423,-0.4323,-0.4323,-86.5132,58.7088,18.5336,-25.2761,-79.7996,-39.9193
1,2025-01-19,1737245825,2025-01-19 00:01:19,"BTC, USDC, PEPE, JPY",BTC,True,4,4,0.0005,0.0005,0.0,0.1542,28.4613,73.2493,34.0212,-0.5106,-5.0676,-38.9048
2,2025-01-18,1737241927,2025-01-18 23:01:18,"USDT, TUSD, BTC, RON",USDT,True,9,4,49.0273,46.953,-2.0743,-2.0743,-423.091,91.1023,60.8592,-94.7022,-390.013,-29.97
3,2025-01-18,1737235205,2025-01-18 21:01:18,"USDT, BTC, JPY, IOTX",USDT,True,6,4,49.6172,49.2867,-0.3305,-0.3305,-66.6156,60.6966,20.5143,-67.7872,-19.1893,-39.94
4,2025-01-18,1737231065,2025-01-18 20:01:18,"USDT, IOTX, JPY, BTC",USDT,True,5,4,49.9615,49.7255,-0.2361,-0.2361,-47.2493,54.2292,14.0733,-21.6124,-39.7509,-39.8987
5,2025-01-18,1737231545,2025-01-18 20:01:18,"USDT, IOTX, JPY, BTC",USDT,True,4,4,49.9725,49.7516,-0.221,-0.221,-44.216,47.0378,6.9104,0.948,-52.0762,-39.8987
6,2025-01-18,1737230586,2025-01-18 20:01:18,"USDT, IOTX, JPY, SEI",USDT,True,5,4,49.9741,49.4828,-0.4912,-0.4912,-98.2915,46.2234,6.099,-13.6531,-90.7978,-39.94
7,2025-01-18,1737225605,2025-01-18 18:01:18,"BTC, USDC, SEI, JPY",BTC,True,6,4,0.0005,0.0005,-0.0,-0.4709,-86.9231,44.0829,4.9687,-31.8597,-63.719,-35.3919
8,2025-01-18,1737220207,2025-01-18 17:01:18,"USDT, EGLD, RON",USDT,True,3,3,49.686,48.951,-0.735,-0.735,-147.9291,64.8716,34.7069,-112.4331,-70.3627,-29.97
9,2025-01-18,1737208925,2025-01-18 14:01:18,"USDT, BTC, JPY, IOTX",USDT,True,5,4,49.44,49.2913,-0.1487,-0.1487,-30.0685,43.5668,3.4529,-43.2865,-2.9881,-27.0913


# Trade the arbitrage pair(s)
- Trade the pairs with highest pnl
- The arbitrage loop starts with one existing currency in current portfolios

- commission are included in quote quantity?

In [5]:
trade_currency = {
    'USDT': 50,
    'ETH':  0.01539456264,
    'BTC': 0.00052979531,
}

In [294]:
client = Binance().client
balance = client.get_account()
balance = pd.DataFrame(balance['balances'])
balance['free'] = balance['free'].astype(float)
balance['locked'] = balance['locked'].astype(float)
balance = balance[balance['free'] > 0]
balance

Unnamed: 0,asset,free,locked
0,BTC,0.0983,0.0
2,ETH,1.8632,0.0
4,BNB,0.0,0.0
11,USDT,9209.6606,0.0
22,TRX,0.0821,0.0
61,XRP,2.6387,0.0
86,ADA,0.0734,0.0
88,XLM,99.9,0.0
130,TUSD,1.4657,0.0
134,IOTX,11.023,0.0
