pip install python-binance


In [3]:
!pip install python-binance




In [1]:
from binance import Client
import pandas as pd
import numpy as np

# Initialize client for public data
spot_client = Client()

# Fetch trades for BTCEUR
r = spot_client.get_recent_trades(symbol="BTCEUR")


In [2]:
df = (
    pd.DataFrame(r)
    .drop(columns=["isBuyerMaker", "isBestMatch"])
)
df["time"] = pd.to_datetime(df["time"], unit="ms")

for column in ["price", "qty", "quoteQty"]:
    df[column] = pd.to_numeric(df[column])
df

Unnamed: 0,id,price,qty,quoteQty,time
0,129860598,35679.35,0.00367,130.943215,2023-12-02 12:28:32.804
1,129860599,35679.39,0.02000,713.587800,2023-12-02 12:28:32.804
2,129860600,35679.40,0.04544,1621.271936,2023-12-02 12:28:32.804
3,129860601,35679.40,0.00470,167.693180,2023-12-02 12:28:32.804
4,129860602,35678.35,0.00084,29.969814,2023-12-02 12:28:41.227
...,...,...,...,...,...
495,129861093,35669.61,0.00018,6.420530,2023-12-02 12:59:39.893
496,129861094,35669.58,0.00242,86.320384,2023-12-02 12:59:39.893
497,129861095,35669.57,0.00036,12.841045,2023-12-02 12:59:39.893
498,129861096,35669.19,0.00242,86.319440,2023-12-02 12:59:42.593


In [9]:
# The DataFrame df is first grouped by a time interval (e.g., 1 minute) using pd.Grouper.
grouped = df.groupby(pd.Grouper(key='time', freq='1Min'))  # Group by 1-minute intervals, for example

# This approach is typical in financial time series analysis where you want to summarize price data over specific intervals, like minutes, hours, or days.
# Display the first few rows of each group
for name, group in grouped:
    print(f"Group: {name}")
    print(group.head())  # Adjust the number of rows to display as needed
    print("\n")  # Adds a newline for better readability

Group: 2023-12-02 12:28:00
          id     price      qty     quoteQty                    time
0  129860598  35679.35  0.00367   130.943215 2023-12-02 12:28:32.804
1  129860599  35679.39  0.02000   713.587800 2023-12-02 12:28:32.804
2  129860600  35679.40  0.04544  1621.271936 2023-12-02 12:28:32.804
3  129860601  35679.40  0.00470   167.693180 2023-12-02 12:28:32.804
4  129860602  35678.35  0.00084    29.969814 2023-12-02 12:28:41.227


Group: 2023-12-02 12:29:00
           id     price      qty    quoteQty                    time
11  129860609  35682.02  0.00108   38.536582 2023-12-02 12:29:06.867
12  129860610  35682.42  0.01030  367.528926 2023-12-02 12:29:15.092
13  129860611  35681.14  0.01627  580.532148 2023-12-02 12:29:19.621
14  129860612  35678.69  0.00403  143.785121 2023-12-02 12:29:28.392
15  129860613  35678.69  0.00207   73.854888 2023-12-02 12:29:28.992


Group: 2023-12-02 12:30:00
           id     price      qty    quoteQty                    time
19  129860617  356

In [15]:
# Accessing the first group
first_group_name, first_group_df = next(iter(grouped))
first_group_df

Unnamed: 0,id,price,qty,quoteQty,time
0,129860598,35679.35,0.00367,130.943215,2023-12-02 12:28:32.804
1,129860599,35679.39,0.02,713.5878,2023-12-02 12:28:32.804
2,129860600,35679.4,0.04544,1621.271936,2023-12-02 12:28:32.804
3,129860601,35679.4,0.0047,167.69318,2023-12-02 12:28:32.804
4,129860602,35678.35,0.00084,29.969814,2023-12-02 12:28:41.227
5,129860603,35681.21,0.00172,61.371681,2023-12-02 12:28:47.199
6,129860604,35681.21,0.00328,117.034369,2023-12-02 12:28:47.231
7,129860605,35683.1,0.00171,61.018101,2023-12-02 12:28:47.231
8,129860606,35683.1,0.00147,52.454157,2023-12-02 12:28:47.243
9,129860607,35683.11,0.00032,11.418595,2023-12-02 12:28:47.243


Some further notes
* iter(grouped) creates an iterator over the groups in the grouped object.

* next() fetches the first item from this iterator, which is a tuple containing the group name (or key) and the corresponding DataFrame.

To access a specific group, you typically use methods like:

* get_group(key): If you know the key (or name) of the group you want to access.
* Iterating over the GroupBy object: This allows you to access each group in turn. As we have done earlier in the code!

In [17]:
# Access a group by its key
specific_key="2023-12-02 12:30:00" # It should bare the name of the date and time in this specific case
specific_group = grouped.get_group(specific_key)
specific_group

Unnamed: 0,id,price,qty,quoteQty,time
19,129860617,35677.23,0.01681,599.734236,2023-12-02 12:30:12.123
20,129860618,35677.65,0.0117,417.428505,2023-12-02 12:30:12.123
21,129860619,35675.33,0.00159,56.723775,2023-12-02 12:30:19.313
22,129860620,35678.99,0.005,178.39495,2023-12-02 12:30:22.700
23,129860621,35680.61,0.00308,109.896279,2023-12-02 12:30:22.700
24,129860622,35680.79,0.01389,495.606173,2023-12-02 12:30:22.700
25,129860623,35675.33,0.0028,99.890924,2023-12-02 12:30:28.211
26,129860624,35677.05,0.0007,24.973935,2023-12-02 12:30:33.670
27,129860625,35676.76,0.00344,122.728054,2023-12-02 12:30:42.121
28,129860626,35679.38,0.00045,16.055721,2023-12-02 12:30:45.309


In [10]:
# Then, the .ohlc() method is called on the 'price' column of this grouped object to compute the Open, High, Low, and Close values for each time interval.
ohlc = grouped['price'].ohlc()
ohlc

Unnamed: 0_level_0,open,high,low,close
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2023-12-02 12:28:00,35679.35,35684.11,35678.35,35684.11
2023-12-02 12:29:00,35682.02,35682.42,35673.0,35673.0
2023-12-02 12:30:00,35677.23,35681.61,35675.33,35679.02
2023-12-02 12:31:00,35677.4,35680.8,35675.18,35675.95
2023-12-02 12:32:00,35673.84,35675.47,35659.89,35660.78
2023-12-02 12:33:00,35659.7,35667.17,35659.7,35667.17
2023-12-02 12:34:00,35666.9,35667.14,35665.13,35665.28
2023-12-02 12:35:00,35665.94,35666.06,35661.65,35664.52
2023-12-02 12:36:00,35665.68,35668.68,35663.59,35668.68
2023-12-02 12:37:00,35666.21,35672.39,35665.86,35667.75


In [23]:
# The grouped (the variable) is a grouped DataFrame and each group contains 'price' and 'qty' columns
vwap = grouped.apply(lambda x: np.average(x['price'], weights=x['qty'])).to_frame("vwap")
vwap.head()

Unnamed: 0_level_0,vwap
time,Unnamed: 1_level_1
2023-12-02 12:28:00,35679.776564
2023-12-02 12:29:00,35679.505732
2023-12-02 12:30:00,35679.416904
2023-12-02 12:31:00,35676.654793
2023-12-02 12:32:00,35668.430774


In [29]:
# Sum the volume in each group
vol = grouped["qty"].sum().to_frame("Volume")
vol.head()

Unnamed: 0_level_0,Volume
time,Unnamed: 1_level_1
2023-12-02 12:28:00,0.08559
2023-12-02 12:29:00,0.05597
2023-12-02 12:30:00,0.09266
2023-12-02 12:31:00,0.10912
2023-12-02 12:32:00,0.0367


In [28]:
# how many data entry in each minute!
cnt =grouped["qty"].size().to_frame("count")
cnt

Unnamed: 0_level_0,count
time,Unnamed: 1_level_1
2023-12-02 12:28:00,11
2023-12-02 12:29:00,8
2023-12-02 12:30:00,14
2023-12-02 12:31:00,20
2023-12-02 12:32:00,15
2023-12-02 12:33:00,15
2023-12-02 12:34:00,10
2023-12-02 12:35:00,27
2023-12-02 12:36:00,10
2023-12-02 12:37:00,14


In [30]:
import numpy as np
import pandas as pd

def get_bars(df, add_time=False):
    """
    Aggregates trading data into a summarized format including OHLC, VWAP, volume, and count.

    Args:
        df (DataFrame): A DataFrame containing trade data with 'price', 'qty', and optionally 'time' columns.
        add_time (bool, optional): If True, includes the timestamp of the last trade in the aggregation. Defaults to False.

    Returns:
        DataFrame: A DataFrame containing aggregated trade data.
    """

    # Calculate Open, High, Low, and Close (OHLC) for the 'price' column
    ohlc = df["price"].ohlc()

    # Calculate Volume Weighted Average Price (VWAP)
    # VWAP is calculated as the sum of price multiplied by quantity, divided by total quantity
    vwap = (
        df.apply(lambda x: np.average(x["price"], weights=x["qty"]))
        .to_frame("vwap")
    )

    # Calculate total volume of trades
    vol = df["qty"].sum().to_frame("Volume")

    # Count the number of trades
    cnt = df["qty"].size().to_frame("Count")
    
    # If add_time is True, include the timestamp of the last trade
    if add_time:
        # Get the last timestamp from the 'time' column
        time = df["time"].last().to_frame("time")

        # Concatenate time, OHLC, VWAP, volume, and count into a single DataFrame
        res = pd.concat([time, ohlc, vwap, vol, cnt], axis=1)
    else:
        # Concatenate OHLC, VWAP, volume, and count into a single DataFrame
        res = pd.concat([ohlc, vwap, vol, cnt], axis=1)

    return res


In [32]:
df_grouped_time = df.groupby(pd.Grouper(key="time", freq="1Min"))
time_bars = get_bars(df_grouped_time)
time_bars.head()

Unnamed: 0_level_0,open,high,low,close,vwap,Volume,Count
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2023-12-02 12:28:00,35679.35,35684.11,35678.35,35684.11,35679.776564,0.08559,11
2023-12-02 12:29:00,35682.02,35682.42,35673.0,35673.0,35679.505732,0.05597,8
2023-12-02 12:30:00,35677.23,35681.61,35675.33,35679.02,35679.416904,0.09266,14
2023-12-02 12:31:00,35677.4,35680.8,35675.18,35675.95,35676.654793,0.10912,20
2023-12-02 12:32:00,35673.84,35675.47,35659.89,35660.78,35668.430774,0.0367,15


In [34]:
df.head()

Unnamed: 0,id,price,qty,quoteQty,time,cum_qty,vol_group
0,129860598,35679.35,0.00367,130.943215,2023-12-02 12:28:32.804,0.00367,0
1,129860599,35679.39,0.02,713.5878,2023-12-02 12:28:32.804,0.02367,0
2,129860600,35679.4,0.04544,1621.271936,2023-12-02 12:28:32.804,0.06911,0
3,129860601,35679.4,0.0047,167.69318,2023-12-02 12:28:32.804,0.07381,0
4,129860602,35678.35,0.00084,29.969814,2023-12-02 12:28:41.227,0.07465,0


In [35]:
# Set the size for each volume bar
bar_size = 1 

# Calculate the cumulative quantity of trades
df["cum_qty"] = df["qty"].cumsum()

# Create volume groups by dividing the cumulative quantity by the bar size
# and applying floor division to get integer group numbers
df["vol_group"] = (
    df["cum_qty"]
    .div(bar_size)  # Divide by bar_size
    .apply(np.floor)  # Round down to the nearest integer
    .astype(int)  # Convert to integer
)

# Group the DataFrame by the volume groups
df_grouped_ticks = df.groupby("vol_group")

df_grouped_ticks.head()


Unnamed: 0,id,price,qty,quoteQty,time,cum_qty,vol_group
0,129860598,35679.35,0.00367,130.943215,2023-12-02 12:28:32.804,0.00367,0
1,129860599,35679.39,0.02,713.5878,2023-12-02 12:28:32.804,0.02367,0
2,129860600,35679.4,0.04544,1621.271936,2023-12-02 12:28:32.804,0.06911,0
3,129860601,35679.4,0.0047,167.69318,2023-12-02 12:28:32.804,0.07381,0
4,129860602,35678.35,0.00084,29.969814,2023-12-02 12:28:41.227,0.07465,0
179,129860777,35656.95,0.02,713.139,2023-12-02 12:40:14.404,1.01102,1
180,129860778,35656.95,9e-05,3.209125,2023-12-02 12:40:14.404,1.01111,1
181,129860779,35652.97,0.00102,36.366029,2023-12-02 12:40:17.909,1.01213,1
182,129860780,35652.98,0.02521,898.811626,2023-12-02 12:40:18.021,1.03734,1
183,129860781,35649.75,0.00449,160.067377,2023-12-02 12:40:19.310,1.04183,1


In [36]:
# Apply the get_bars function to each volume group to create volume bars
# add_time=True includes the timestamp of the last trade in each bar
volume_bars = get_bars(df_grouped_ticks, add_time=True)

# Display the first few rows of the volume bars
volume_bars.head()

Unnamed: 0_level_0,time,open,high,low,close,vwap,Volume,Count
vol_group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,2023-12-02 12:40:14.404,35679.35,35684.11,35656.94,35656.95,35670.380014,0.99102,179
1,2023-12-02 12:53:39.513,35656.95,35660.0,35636.21,35656.98,35649.504605,1.00715,212
2,2023-12-02 12:59:42.593,35656.98,35669.61,35651.25,35668.99,35657.530785,0.42582,109
