In [21]:
# Import required libraries
# pandas: for data manipulation and analysis
# requests: for making HTTP requests to Binance API
# math: for mathematical operations
# Decimal: for precise decimal arithmetic
import pandas as pd
import numpy as np
import requests
import math
from decimal import Decimal

In [22]:
# Define the trading pair symbol and API endpoint
symbol = "SOLUSDT"  # Trading pair: Solana/USDT
url = "https://api.binance.com/api/v1/depth"  # Binance orderbook endpoint

# Parameters for the API request
# limit=1000 gets the top 1000 bids and asks
params = {
    "symbol": symbol,
    "limit": 1000,
}

In [23]:
data = requests.get(url, params).json()
data

{'lastUpdateId': 25386613862,
 'bids': [['194.42000000', '539.32700000'],
  ['194.41000000', '320.63500000'],
  ['194.40000000', '256.19200000'],
  ['194.39000000', '274.25700000'],
  ['194.38000000', '70.18400000'],
  ['194.37000000', '56.48900000'],
  ['194.36000000', '116.34000000'],
  ['194.35000000', '218.35200000'],
  ['194.34000000', '162.87300000'],
  ['194.33000000', '285.90500000'],
  ['194.32000000', '287.22300000'],
  ['194.31000000', '241.70500000'],
  ['194.30000000', '145.27000000'],
  ['194.29000000', '414.37200000'],
  ['194.28000000', '592.43900000'],
  ['194.27000000', '130.25700000'],
  ['194.26000000', '265.48900000'],
  ['194.25000000', '318.47600000'],
  ['194.24000000', '259.70600000'],
  ['194.23000000', '107.83900000'],
  ['194.22000000', '288.05100000'],
  ['194.21000000', '289.35200000'],
  ['194.20000000', '149.75000000'],
  ['194.19000000', '192.55200000'],
  ['194.18000000', '562.52300000'],
  ['194.17000000', '440.96600000'],
  ['194.16000000', '498.1250

In [24]:
factor_price = data['bids'][0][0]
precision_price = len(factor_price.split('.')[-1])
factor_price = 10 ** precision_price

# Set the interval for price level aggregation (0.1 USDT)
interval = Decimal('0.1')
interval = int(interval * factor_price)

In [25]:
factor_quantity = data['bids'][0][1]
precision_quantity = len(factor_quantity.split('.')[-1])
factor_quantity = 10 ** precision_quantity

In [26]:
bids = data['bids']
bids = [
    [
        int(Decimal(x[0]) * factor_price), int(Decimal(x[1]) * factor_quantity)
    ]
    for x in bids
]
print("bids:", bids)

asks = data['asks']
asks = [
    [
        int(Decimal(x[0]) * factor_price), int(Decimal(x[1]) * factor_quantity)
    ]
    for x in asks
]
print("asks:", asks)

bids: [[19442000000, 53932700000], [19441000000, 32063500000], [19440000000, 25619200000], [19439000000, 27425700000], [19438000000, 7018400000], [19437000000, 5648900000], [19436000000, 11634000000], [19435000000, 21835200000], [19434000000, 16287300000], [19433000000, 28590500000], [19432000000, 28722300000], [19431000000, 24170500000], [19430000000, 14527000000], [19429000000, 41437200000], [19428000000, 59243900000], [19427000000, 13025700000], [19426000000, 26548900000], [19425000000, 31847600000], [19424000000, 25970600000], [19423000000, 10783900000], [19422000000, 28805100000], [19421000000, 28935200000], [19420000000, 14975000000], [19419000000, 19255200000], [19418000000, 56252300000], [19417000000, 44096600000], [19416000000, 49812500000], [19415000000, 14245500000], [19414000000, 20039600000], [19413000000, 48836800000], [19412000000, 14363100000], [19411000000, 11529500000], [19410000000, 20433400000], [19409000000, 56585800000], [19408000000, 51662800000], [19407000000, 1

## Aggregating Bids Levels

In [27]:
# Create a DataFrame for bid orders
# Convert the bids data from the API response into a structured DataFrame
# Each row contains: price level and quantity available at that price
bids_levels = pd.DataFrame(bids, columns=['price', 'quantity'], dtype=np.int64)
bids_levels['side'] = 'bid'  # Add a column to identify this as bid orders
print(bids_levels)
print(bids_levels.dtypes)  # Display data types of each column

           price     quantity side
0    19442000000  53932700000  bid
1    19441000000  32063500000  bid
2    19440000000  25619200000  bid
3    19439000000  27425700000  bid
4    19438000000   7018400000  bid
..           ...          ...  ...
995  18447000000    436800000  bid
996  18446000000    371600000  bid
997  18445000000    722300000  bid
998  18444000000     79200000  bid
999  18443000000   1153300000  bid

[1000 rows x 3 columns]
price        int64
quantity     int64
side        object
dtype: object


In [28]:
# Calculate the minimum bid price level
# This rounds down the minimum bid price to the nearest interval (0.1 USDT)
min_bid_level = math.floor(min(bids_levels['price']) / interval) * interval
min_bid_level

18440000000

In [29]:
# Calculate the maximum bid price level
# Round up to the next interval and add one more interval for binning
max_bid_level = (math.ceil(max(bids_levels['price']) / interval) + 1) * interval
max_bid_level

19460000000

In [30]:
# Create an array of price levels for binning
# Generate price boundaries from min to max bid level with the specified interval
bid_levels_bounds = [(min_bid_level + interval * i)
    for i in range(int((max_bid_level - min_bid_level) / interval) + 1)
]
bid_levels_bounds

[18440000000,
 18450000000,
 18460000000,
 18470000000,
 18480000000,
 18490000000,
 18500000000,
 18510000000,
 18520000000,
 18530000000,
 18540000000,
 18550000000,
 18560000000,
 18570000000,
 18580000000,
 18590000000,
 18600000000,
 18610000000,
 18620000000,
 18630000000,
 18640000000,
 18650000000,
 18660000000,
 18670000000,
 18680000000,
 18690000000,
 18700000000,
 18710000000,
 18720000000,
 18730000000,
 18740000000,
 18750000000,
 18760000000,
 18770000000,
 18780000000,
 18790000000,
 18800000000,
 18810000000,
 18820000000,
 18830000000,
 18840000000,
 18850000000,
 18860000000,
 18870000000,
 18880000000,
 18890000000,
 18900000000,
 18910000000,
 18920000000,
 18930000000,
 18940000000,
 18950000000,
 18960000000,
 18970000000,
 18980000000,
 18990000000,
 19000000000,
 19010000000,
 19020000000,
 19030000000,
 19040000000,
 19050000000,
 19060000000,
 19070000000,
 19080000000,
 19090000000,
 19100000000,
 19110000000,
 19120000000,
 19130000000,
 19140000000,
 19150

In [31]:
# Assign each bid price to its corresponding price bin
# right=False means intervals are left-inclusive
# precision=10 ensures we don't lose decimal precision
bids_levels["bin"] = pd.cut(
    bids_levels['price'],
    bins=bid_levels_bounds,
    right=False,
    precision=10
)
bids_levels

Unnamed: 0,price,quantity,side,bin
0,19442000000,53932700000,bid,"[19440000000, 19450000000)"
1,19441000000,32063500000,bid,"[19440000000, 19450000000)"
2,19440000000,25619200000,bid,"[19440000000, 19450000000)"
3,19439000000,27425700000,bid,"[19430000000, 19440000000)"
4,19438000000,7018400000,bid,"[19430000000, 19440000000)"
...,...,...,...,...
995,18447000000,436800000,bid,"[18440000000, 18450000000)"
996,18446000000,371600000,bid,"[18440000000, 18450000000)"
997,18445000000,722300000,bid,"[18440000000, 18450000000)"
998,18444000000,79200000,bid,"[18440000000, 18450000000)"


In [32]:
# Group the bids by their price bins and aggregate the data
# Sum up quantities for each price level
# Keep the 'side' information (all 'bid' in this case)
# Reset index to make the bin column a regular column
bids_levels = bids_levels.groupby("bin").agg(
    quantity = ("quantity", "sum"),
    side = ("side", "first")
).reset_index()

# Extract the lower bound of each bin as the price level label
bids_levels['label'] = bids_levels['bin'].apply(lambda x: x.left)
bids_levels

  bids_levels = bids_levels.groupby("bin").agg(


Unnamed: 0,bin,quantity,side,label
0,"[18440000000, 18450000000)",3231300000,bid,18440000000
1,"[18450000000, 18460000000)",42806500000,bid,18450000000
2,"[18460000000, 18470000000)",21810700000,bid,18460000000
3,"[18470000000, 18480000000)",8515100000,bid,18470000000
4,"[18480000000, 18490000000)",29460000000,bid,18480000000
...,...,...,...,...
97,"[19410000000, 19420000000)",298864500000,bid,19410000000
98,"[19420000000, 19430000000)",281573100000,bid,19420000000
99,"[19430000000, 19440000000)",185859800000,bid,19430000000
100,"[19440000000, 19450000000)",111615400000,bid,19440000000


## Aggregating Asks Levels

In [33]:
# Create a DataFrame for ask orders
# Convert the asks data from the API response into a structured DataFrame
# Each row contains: price level and quantity available at that price
asks_levels = pd.DataFrame(asks, columns=['price', 'quantity'], dtype=np.int64)
asks_levels['side'] = 'ask'  # Add a column to identify this as ask orders
print(asks_levels)
print(asks_levels.dtypes)  # Display data types of each column

           price     quantity side
0    19443000000   5797700000  ask
1    19444000000   5179500000  ask
2    19445000000   9516000000  ask
3    19446000000  35581300000  ask
4    19447000000  52119500000  ask
..           ...          ...  ...
995  20438000000    299100000  ask
996  20439000000    326100000  ask
997  20440000000  32451200000  ask
998  20441000000    173000000  ask
999  20442000000    415600000  ask

[1000 rows x 3 columns]
price        int64
quantity     int64
side        object
dtype: object


In [34]:
# Calculate minimum ask level
# Round down to nearest interval and subtract one interval to ensure all asks are included
min_ask_level = (math.floor(min(asks_levels['price']) / interval) - 1) * interval
min_ask_level

19430000000

In [35]:
# Calculate maximum ask level
# Round up to next interval and add one more interval for complete binning
max_ask_level = (math.ceil(max(asks_levels['price']) / interval) + 1) * interval
max_ask_level

20460000000

In [36]:
# Create an array of price levels for ask binning
# Generate price boundaries from min to max ask level with the specified interval
ask_levels_bounds =[(min_ask_level + interval * i)
    for i in range(int((max_ask_level - min_ask_level) / interval) + 1)
]
ask_levels_bounds

[19430000000,
 19440000000,
 19450000000,
 19460000000,
 19470000000,
 19480000000,
 19490000000,
 19500000000,
 19510000000,
 19520000000,
 19530000000,
 19540000000,
 19550000000,
 19560000000,
 19570000000,
 19580000000,
 19590000000,
 19600000000,
 19610000000,
 19620000000,
 19630000000,
 19640000000,
 19650000000,
 19660000000,
 19670000000,
 19680000000,
 19690000000,
 19700000000,
 19710000000,
 19720000000,
 19730000000,
 19740000000,
 19750000000,
 19760000000,
 19770000000,
 19780000000,
 19790000000,
 19800000000,
 19810000000,
 19820000000,
 19830000000,
 19840000000,
 19850000000,
 19860000000,
 19870000000,
 19880000000,
 19890000000,
 19900000000,
 19910000000,
 19920000000,
 19930000000,
 19940000000,
 19950000000,
 19960000000,
 19970000000,
 19980000000,
 19990000000,
 20000000000,
 20010000000,
 20020000000,
 20030000000,
 20040000000,
 20050000000,
 20060000000,
 20070000000,
 20080000000,
 20090000000,
 20100000000,
 20110000000,
 20120000000,
 20130000000,
 20140

In [37]:
# Assign each ask price to its corresponding price bin
# right=True means intervals are right-inclusive (opposite of bids)
# precision=10 ensures we don't lose decimal precision
asks_levels["bin"] = pd.cut(
    asks_levels['price'],
    bins=ask_levels_bounds,
    right=True,
    precision=10
)
asks_levels

Unnamed: 0,price,quantity,side,bin
0,19443000000,5797700000,ask,"(19440000000, 19450000000]"
1,19444000000,5179500000,ask,"(19440000000, 19450000000]"
2,19445000000,9516000000,ask,"(19440000000, 19450000000]"
3,19446000000,35581300000,ask,"(19440000000, 19450000000]"
4,19447000000,52119500000,ask,"(19440000000, 19450000000]"
...,...,...,...,...
995,20438000000,299100000,ask,"(20430000000, 20440000000]"
996,20439000000,326100000,ask,"(20430000000, 20440000000]"
997,20440000000,32451200000,ask,"(20430000000, 20440000000]"
998,20441000000,173000000,ask,"(20440000000, 20450000000]"


In [38]:
# Group the asks by their price bins and aggregate the data
# Similar to bids processing:
# - Sum quantities for each price level
# - Keep the 'side' information
# - Reset index for regular column access
asks_levels = asks_levels.groupby("bin").agg(
    quantity = ("quantity", "sum"),
    side = ("side", "first")
).reset_index()

# Extract the lower bound of each bin as the price level label
asks_levels['label'] = asks_levels['bin'].apply(lambda x: x.left)
asks_levels

  asks_levels = asks_levels.groupby("bin").agg(


Unnamed: 0,bin,quantity,side,label
0,"(19430000000, 19440000000]",0,,19430000000
1,"(19440000000, 19450000000]",201842700000,ask,19440000000
2,"(19450000000, 19460000000]",380418800000,ask,19450000000
3,"(19460000000, 19470000000]",347310700000,ask,19460000000
4,"(19470000000, 19480000000]",246117800000,ask,19470000000
...,...,...,...,...
98,"(20410000000, 20420000000]",55075500000,ask,20410000000
99,"(20420000000, 20430000000]",49244700000,ask,20420000000
100,"(20430000000, 20440000000]",54620800000,ask,20430000000
101,"(20440000000, 20450000000]",588600000,ask,20440000000


## Concatening both bid and ask in orderbook

In [39]:
# Combine both bid and ask levels into a single orderbook DataFrame
# 1. Concatenate asks and bids
# 2. Remove any price levels with zero quantity
# 3. Sort by price (label) in descending order to show highest price first
orderbook = pd.concat([asks_levels, bids_levels])
orderbook = orderbook[orderbook['quantity'] > 0]
orderbook = orderbook.sort_values(by='label', ascending=False)
print(orderbook.to_string())

                            bin       quantity side        label
101  (20440000000, 20450000000]      588600000  ask  20440000000
100  (20430000000, 20440000000]    54620800000  ask  20430000000
99   (20420000000, 20430000000]    49244700000  ask  20420000000
98   (20410000000, 20420000000]    55075500000  ask  20410000000
97   (20400000000, 20410000000]    29076800000  ask  20400000000
96   (20390000000, 20400000000]   482241200000  ask  20390000000
95   (20380000000, 20390000000]   897828800000  ask  20380000000
94   (20370000000, 20380000000]    10322300000  ask  20370000000
93   (20360000000, 20370000000]   222719000000  ask  20360000000
92   (20350000000, 20360000000]   730858200000  ask  20350000000
91   (20340000000, 20350000000]    39169100000  ask  20340000000
90   (20330000000, 20340000000]    37900800000  ask  20330000000
89   (20320000000, 20330000000]    33178800000  ask  20320000000
88   (20310000000, 20320000000]    72072100000  ask  20310000000
87   (20300000000, 203100

In [40]:
orderbook['quantity'] /= factor_quantity
orderbook['label'] /= factor_price
orderbook

Unnamed: 0,bin,quantity,side,label
101,"(20440000000, 20450000000]",5.886,ask,204.4
100,"(20430000000, 20440000000]",546.208,ask,204.3
99,"(20420000000, 20430000000]",492.447,ask,204.2
98,"(20410000000, 20420000000]",550.755,ask,204.1
97,"(20400000000, 20410000000]",290.768,ask,204.0
...,...,...,...,...
4,"[18480000000, 18490000000)",294.600,bid,184.8
3,"[18470000000, 18480000000)",85.151,bid,184.7
2,"[18460000000, 18470000000)",218.107,bid,184.6
1,"[18450000000, 18460000000)",428.065,bid,184.5
