In [1]:
# adhoc to get latest CCXT version.
!sudo /bin/bash -c "(source /venv/bin/activate; pip install --upgrade ccxt)"

Requirement already up-to-date: ccxt in /venv/lib/python3.9/site-packages (4.2.49)


In [2]:
import ccxt
import pandas as pd

import helpers.hdatetime as hdateti
import im_v2.common.data.client.im_raw_data_client as imvcdcimrdc
import im_v2.common.universe.universe as imvcounun

  from tqdm.autonotebook import tqdm


In [3]:
ccxt.__version__

'4.2.49'

In [4]:
ccxt_exchange = ccxt.binance()

In [5]:
def get_ohlcv_data(symbol, start_time, end_time):
    since = hdateti.convert_timestamp_to_unix_epoch(pd.Timestamp(start_time))
    end = hdateti.convert_timestamp_to_unix_epoch(pd.Timestamp(end_time))
    limit = int((end - since) / 60000) + 1
    ohlcv = pd.DataFrame(
        ccxt_exchange.fetch_ohlcv(symbol, "1m", since=since, limit=limit)
    )
    ohlcv.columns = ["timestamp", "open", "high", "low", "close", "volume"]
    return ohlcv

In [137]:
def get_trades_data(symbol, since):
    #     since = hdateti.convert_timestamp_to_unix_epoch(pd.Timestamp(start_time))
    trades = []
    last_data_id = None
    while True:
        if last_data_id is None:
            # Start from beginning, get the data from the start timestamp.
            data = ccxt_exchange.fetch_trades(
                symbol,
                since=since,
                limit=1000,
            )
        else:
            params = {"fromId": last_data_id}
            data = ccxt_exchange.fetch_trades(symbol, limit=1000, params=params)
        last_data_id = int(data[-1]["id"]) + 1
        trades.extend(data)
        if data[-1]["timestamp"] > since + 60000:
            break
    return trades

In [276]:
def get_error(REST_ohlcv, fil_data):
    merged_df = pd.merge(REST_ohlcv, fil_data, on="timestamp")
    merged_df["open_error"] = (
        100 * abs(merged_df["open_x"] - merged_df["open_y"]) / merged_df["open_y"]
    )
    merged_df["high_error"] = (
        100 * abs(merged_df["high_x"] - merged_df["high_y"]) / merged_df["high_y"]
    )
    merged_df["low_error"] = (
        100 * abs(merged_df["low_x"] - merged_df["low_y"]) / merged_df["low_y"]
    )
    merged_df["close_error"] = (
        100
        * abs(merged_df["close_x"] - merged_df["close_y"])
        / merged_df["close_y"]
    )
    merged_df["volume_error"] = (
        100
        * abs(merged_df["volume_x"] - merged_df["volume_y"])
        / merged_df["volume_y"]
    )
    print("Avg Open error  :", merged_df["open_error"].mean().round(2))
    print("Avg Close error :", merged_df["close_error"].mean().round(2))
    print("Avg High error  :", merged_df["high_error"].mean().round(2))
    print("Avg Low error   :", merged_df["low_error"].mean().round(2))
    print("Avg Volume error:", merged_df["volume_error"].mean().round(2))
    diff = merged_df[abs(merged_df["high_x"] - merged_df["high_y"]) > 1e-2]
    print(diff)
    data = fil_data[fil_data["timestamp"].isin(diff["timestamp"])]
    print(data)
    return diff, data


#     import pdb;pdb.set_trace()

In [277]:
def ohlcv_cross_data_qa(
    start_time,
    end_time,
    symbol,
    *,
    signature="realtime.airflow.downloaded_1min.postgres.ohlcv.futures.v7_4.ccxt.binance.v1_0_0",
    stage="test",
):
    data_reader = imvcdcimrdc.RawDataReader(signature, stage=stage)
    ohlcv_trades_data = data_reader.read_data(
        pd.Timestamp(start_time), pd.Timestamp(end_time)
    )
    filtered_data = ohlcv_trades_data[
        ohlcv_trades_data["currency_pair"] == symbol
    ]
    print(
        (
            filtered_data["knowledge_timestamp"]
            - filtered_data["end_download_timestamp"]
        ).mean()
    )
    symbol_rest = symbol.replace("_", "/") + ":USDT"
    REST_ohlcv = get_ohlcv_data(symbol_rest, start_time, end_time)
    print("Error % for symbol:", symbol)
    #     import pdb;pdb.set_trace()
    print(filtered_data)
    #     import pdb;pdb.set_trace()
    return get_error(REST_ohlcv, filtered_data)

In [278]:
vendor_name = "CCXT"
mode = "download"
version = "v7.3"
universe = imvcounun.get_vendor_universe(vendor_name, mode, version=version)
universe_list = universe["binance"]
universe_list = ["ETH_USDT"]
len(universe_list)

1

In [279]:
start_time = "2024-03-05T10:00:00+00:00"
end_time = "2024-03-05T13:10:00+00:00"
for symbol in universe_list:
    diff, data = ohlcv_cross_data_qa(start_time, end_time, symbol)

  df = pd.read_sql_query(query, connection)


0 days 00:00:00.137496879
Error % for symbol: ETH_USDT
          id      timestamp     open     high      low    close     volume  \
0   54900187  1709638620000  3733.89  3733.89  3731.38  3732.54   1076.443   
1   54900188  1709638680000  3732.54  3734.87  3731.00  3734.40   1371.746   
2   54900189  1709638740000  3734.40  3737.00  3733.75  3736.63   1390.271   
3   54900190  1709638800000  3736.63  3736.83  3732.21  3734.49   2034.620   
4   54900191  1709638860000  3734.49  3737.86  3732.92  3732.93    946.462   
5   54900192  1709638920000  3732.92  3736.22  3732.50  3734.91    973.450   
6   54900193  1709638980000  3734.90  3737.70  3734.31  3736.67   1082.863   
7   54900194  1709639040000  3736.68  3737.48  3732.82  3732.83   1693.116   
8   54900195  1709639100000  3732.82  3738.73  3731.00  3738.73   2437.276   
9   54900196  1709639160000  3738.72  3743.44  3737.23  3743.26   3590.652   
10  54900197  1709639220000  3743.25  3745.74  3740.59  3745.74   2060.703   
11  54900

In [60]:
diff[
    [
        "timestamp",
        "open_x",
        "high_x",
        "low_x",
        "close_x",
        "volume_x",
        "open_y",
        "high_y",
        "low_y",
        "close_y",
        "volume_y",
    ]
].tail(10)

Unnamed: 0,timestamp,open_x,high_x,low_x,close_x,volume_x,open_y,high_y,low_y,close_y,volume_y
364,1708975320000,3160.76,3171.1,3160.26,3166.01,6895.6,3160.76,3171.1,3159.69,3166.01,6910.636
373,1708975860000,3172.45,3184.31,3171.97,3175.59,22162.776,3172.45,3226.87,3171.97,3175.59,22162.837
407,1708977960000,3196.38,3199.0,3193.41,3197.42,9839.71,3196.38,3199.0,3192.26,3197.42,9839.748
503,1708983780000,3189.32,3191.16,3189.32,3190.36,374.4,3189.32,3362.43,3189.32,3190.36,375.4
740,1708998240000,3191.17,3198.95,3190.39,3195.69,13163.738,3191.17,3198.95,3176.43,3195.69,13164.719
744,1708999560000,3229.74,3254.12,3226.24,3250.8,37318.839,3229.74,3274.2,3205.17,3250.8,37352.693
745,1708999620000,3250.8,3268.1,3242.85,3261.63,38242.711,3250.8,3325.71,3206.89,3261.63,38386.637
746,1709003160000,3243.45,3244.6,3240.75,3242.99,1504.349,3243.45,3244.6,0.0,3242.99,1504.36
747,1709003220000,3243.0,3243.33,3239.98,3241.42,1675.083,3243.0,3243.33,3199.68,3241.42,1675.483
750,1709003400000,3243.81,3244.8,3242.61,3242.61,1026.923,3243.81,3244.8,3239.74,3242.61,1027.013


In [52]:
data

Unnamed: 0,id,timestamp,open,high,low,close,volume,currency_pair,exchange_id,end_download_timestamp,knowledge_timestamp
4570,54867416,1708963920000,53425.0,54093.25,53425.0,53633.0,5369.103,BTC_USDT,binance,2024-02-26 16:13:01.763486+00:00,2024-02-26 16:13:03.387947+00:00
10291,54873137,1708977960000,54838.4,55126.63,54763.2,54916.7,2721.079,BTC_USDT,binance,2024-02-26 20:07:05.947698+00:00,2024-02-26 20:07:06.915714+00:00
20442,54883288,1709003160000,56543.8,56553.5,35778.96,56511.0,365.982,BTC_USDT,binance,2024-02-27 03:07:01.351579+00:00,2024-02-27 03:07:01.906345+00:00
21616,54884462,1709005980000,56013.1,56017.7,55814.0,55876.1,2521.857,BTC_USDT,binance,2024-02-27 03:54:02.196273+00:00,2024-02-27 03:54:04.912489+00:00


In [266]:
trades = get_trades_data("ETH/USDT:USDT", 1709480580000)

In [267]:
len(trades)

1000

In [272]:
trades[29]

{'info': {'a': '1520833751',
  'p': '3431.98',
  'q': '0.114',
  'f': '3691724447',
  'l': '3691724451',
  'T': '1709480589228',
  'm': True},
 'timestamp': 1709480589228,
 'datetime': '2024-03-03T15:43:09.228Z',
 'symbol': 'ETH/USDT:USDT',
 'id': '1520833751',
 'order': None,
 'type': None,
 'side': 'sell',
 'takerOrMaker': None,
 'price': 3431.98,
 'amount': 0.114,
 'cost': 391.24572,
 'fee': None,
 'fees': []}

In [268]:
ccxt_exchange.build_ohlcvc(trades)

[[1709480580000, 3431.98, 3432.13, 3430.68, 3431.93, 536.5779999999986, 304],
 [1709480640000, 3431.93, 3433.8, 3431.93, 3432.23, 1021.104999999999, 468],
 [1709480700000, 3432.24, 3432.72, 3431.47, 3432.13, 451.972, 228]]

In [84]:
import im_v2.ccxt.data.extract.extractor as imvcdexex

extractor = imvcdexex.CcxtExtractor("binance", "futures")

In [85]:
trades_df = extractor._fetch_trades(
    "ETH_USDT",
    start_timestamp="2024-02-29T11:31:00+00:00",
    end_timestamp="2024-02-29T11:33:00+00:00",
)

AttributeError: 'str' object has no attribute 'tz'

In [119]:
import pickle

trades = pickle.load(open("../../../../../../trades.pkl", "rb"))

In [121]:
len(trades)

909460665

In [122]:
ccxt_exchange.build_ohlcvc(trades, since=1709231580000)

KeyboardInterrupt: 

In [125]:
from tqdm import tqdm

my_trades = []
for trade in tqdm(trades):
    if (
        trade["timestamp"] >= 1709231580000
        and trade["timestamp"] <= 1709231580000 + 60100
    ):
        my_trades.append(trade)

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 909460665/909460665 [06:19<00:00, 2399296.89it/s]


In [126]:
len(my_trades)

27006200

In [127]:
my_trades[0]

{'info': {'e': 'trade',
  'E': 1709231580003,
  'T': 1709231580003,
  's': 'ETHUSDT',
  't': 3681469088,
  'p': '3388.75',
  'q': '0.021',
  'X': 'MARKET',
  'm': True},
 'timestamp': 1709231580003,
 'datetime': '2024-02-29T18:33:00.003Z',
 'symbol': 'ETH/USDT:USDT',
 'id': '3681469088',
 'order': None,
 'type': None,
 'side': 'sell',
 'takerOrMaker': None,
 'price': 3388.75,
 'amount': 0.021,
 'cost': 71.16375,
 'fee': None,
 'fees': []}

In [128]:
df = pd.DataFrame(my_trades)

In [132]:
df = df[
    ["timestamp", "datetime", "symbol", "id", "side", "price", "amount", "cost"]
]
df = df.drop_duplicates()

In [133]:
len(df)

15246

In [134]:
trades_ts = df.to_dict(orient="records")

In [136]:
ccxt_exchange.build_ohlcvc(trades_ts)

[[1709231580000, 3388.75, 3388.76, 3368.06, 3376.2, 15710.068999999085, 15235],
 [1709231640000, 3376.2, 3376.2, 3376.2, 3376.2, 2.1529999999999996, 11]]

In [145]:
trades_ts[:20]

[{'timestamp': 1709231580003,
  'datetime': '2024-02-29T18:33:00.003Z',
  'symbol': 'ETH/USDT:USDT',
  'id': '3681469088',
  'side': 'sell',
  'price': 3388.75,
  'amount': 0.021,
  'cost': 71.16375},
 {'timestamp': 1709231580019,
  'datetime': '2024-02-29T18:33:00.019Z',
  'symbol': 'ETH/USDT:USDT',
  'id': '3681469089',
  'side': 'buy',
  'price': 3388.76,
  'amount': 12.01,
  'cost': 40699.0076},
 {'timestamp': 1709231580033,
  'datetime': '2024-02-29T18:33:00.033Z',
  'symbol': 'ETH/USDT:USDT',
  'id': '3681469090',
  'side': 'sell',
  'price': 3388.75,
  'amount': 0.017,
  'cost': 57.60875},
 {'timestamp': 1709231580037,
  'datetime': '2024-02-29T18:33:00.037Z',
  'symbol': 'ETH/USDT:USDT',
  'id': '3681469091',
  'side': 'buy',
  'price': 3388.76,
  'amount': 0.542,
  'cost': 1836.70792},
 {'timestamp': 1709231580037,
  'datetime': '2024-02-29T18:33:00.037Z',
  'symbol': 'ETH/USDT:USDT',
  'id': '3681469092',
  'side': 'buy',
  'price': 3388.76,
  'amount': 1.5,
  'cost': 5083.14

In [143]:
trades[:10]

[{'info': {'a': '1517803116',
   'p': '3388.76',
   'q': '12.010',
   'f': '3681469089',
   'l': '3681469089',
   'T': '1709231580019',
   'm': False},
  'timestamp': 1709231580019,
  'datetime': '2024-02-29T18:33:00.019Z',
  'symbol': 'ETH/USDT:USDT',
  'id': '1517803116',
  'order': None,
  'type': None,
  'side': 'buy',
  'takerOrMaker': None,
  'price': 3388.76,
  'amount': 12.01,
  'cost': 40699.0076,
  'fee': None,
  'fees': []},
 {'info': {'a': '1517803117',
   'p': '3388.75',
   'q': '0.017',
   'f': '3681469090',
   'l': '3681469090',
   'T': '1709231580033',
   'm': True},
  'timestamp': 1709231580033,
  'datetime': '2024-02-29T18:33:00.033Z',
  'symbol': 'ETH/USDT:USDT',
  'id': '1517803117',
  'order': None,
  'type': None,
  'side': 'sell',
  'takerOrMaker': None,
  'price': 3388.75,
  'amount': 0.017,
  'cost': 57.60875,
  'fee': None,
  'fees': []},
 {'info': {'a': '1517803118',
   'p': '3388.76',
   'q': '12.010',
   'f': '3681469091',
   'l': '3681469099',
   'T': '170

In [154]:
trades[-530:-510]

[{'info': {'a': '1517808586',
   'p': '3376.52',
   'q': '0.295',
   'f': '3681484250',
   'l': '3681484252',
   'T': '1709231639221',
   'm': False},
  'timestamp': 1709231639221,
  'datetime': '2024-02-29T18:33:59.221Z',
  'symbol': 'ETH/USDT:USDT',
  'id': '1517808586',
  'order': None,
  'type': None,
  'side': 'buy',
  'takerOrMaker': None,
  'price': 3376.52,
  'amount': 0.295,
  'cost': 996.0734,
  'fee': None,
  'fees': []},
 {'info': {'a': '1517808587',
   'p': '3376.53',
   'q': '0.006',
   'f': '3681484253',
   'l': '3681484253',
   'T': '1709231639223',
   'm': False},
  'timestamp': 1709231639223,
  'datetime': '2024-02-29T18:33:59.223Z',
  'symbol': 'ETH/USDT:USDT',
  'id': '1517808587',
  'order': None,
  'type': None,
  'side': 'buy',
  'takerOrMaker': None,
  'price': 3376.53,
  'amount': 0.006,
  'cost': 20.25918,
  'fee': None,
  'fees': []},
 {'info': {'a': '1517808588',
   'p': '3376.61',
   'q': '8.120',
   'f': '3681484254',
   'l': '3681484294',
   'T': '1709231

In [153]:
trades_ts[-32:-12]

[{'timestamp': 1709231639693,
  'datetime': '2024-02-29T18:33:59.693Z',
  'symbol': 'ETH/USDT:USDT',
  'id': '3681484302',
  'side': 'sell',
  'price': 3376.43,
  'amount': 0.616,
  'cost': 2079.88088},
 {'timestamp': 1709231639693,
  'datetime': '2024-02-29T18:33:59.693Z',
  'symbol': 'ETH/USDT:USDT',
  'id': '3681484303',
  'side': 'sell',
  'price': 3376.43,
  'amount': 0.937,
  'cost': 3163.71491},
 {'timestamp': 1709231639693,
  'datetime': '2024-02-29T18:33:59.693Z',
  'symbol': 'ETH/USDT:USDT',
  'id': '3681484304',
  'side': 'sell',
  'price': 3376.42,
  'amount': 1.0,
  'cost': 3376.42},
 {'timestamp': 1709231639693,
  'datetime': '2024-02-29T18:33:59.693Z',
  'symbol': 'ETH/USDT:USDT',
  'id': '3681484305',
  'side': 'sell',
  'price': 3376.42,
  'amount': 1.62,
  'cost': 5469.8004},
 {'timestamp': 1709231639693,
  'datetime': '2024-02-29T18:33:59.693Z',
  'symbol': 'ETH/USDT:USDT',
  'id': '3681484306',
  'side': 'sell',
  'price': 3376.39,
  'amount': 0.007,
  'cost': 23.63

In [172]:
import requests


def fetch_all_trades(symbol, start_time, end_time):
    limit = 1000  # Maximum number of trades per request
    all_trades = []  # List to store all trades

    while True:
        url = f"https://fapi.binance.com/fapi/v1/aggTrades?symbol={symbol}&startTime={start_time}&endTime={end_time}&limit={limit}"
        response = requests.get(url)

        if response.status_code == 200:
            trades_data = response.json()
            all_trades.extend(trades_data)

            # Check if all trades are fetched
            if len(trades_data) < limit:
                break  # Exit loop if fewer trades than limit are returned

            # Update start_time for next request
            start_time = trades_data[-1]["T"]

        else:
            print(
                f"Failed to fetch trades data. Status code: {response.status_code}"
            )
            return None

    all_trades = pd.DataFrame(all_trades)
    all_trades = all_trades.drop_duplicates()
    all_trades = all_trades.to_dict(orient="records")
    return all_trades

In [264]:
trades_api = fetch_all_trades("ETHUSDT", 1709480580000, 1709480580000 + 60000)

In [265]:
len(trades_api)

304

In [164]:
trades_api[-1]

{'a': 1517808606,
 'p': '3376.20',
 'q': '0.491',
 'f': 3681484322,
 'l': 3681484322,
 'T': 1709231639840,
 'm': False}

In [170]:
trades[5490]

{'info': {'a': '1517808606',
  'p': '3376.20',
  'q': '0.491',
  'f': '3681484322',
  'l': '3681484322',
  'T': '1709231639840',
  'm': False},
 'timestamp': 1709231639840,
 'datetime': '2024-02-29T18:33:59.840Z',
 'symbol': 'ETH/USDT:USDT',
 'id': '1517808606',
 'order': None,
 'type': None,
 'side': 'buy',
 'takerOrMaker': None,
 'price': 3376.2,
 'amount': 0.491,
 'cost': 1657.7142,
 'fee': None,
 'fees': []}

In [280]:
import pickle

trades_ccxt_ws = pickle.load(open("../../../../../../trades_ccxt.pkl", "rb"))

In [281]:
len(trades_ccxt_ws)

888170974

In [282]:
trades_ccxt_ws[0]

{'info': {'e': 'trade',
  'E': 1709638563141,
  'T': 1709638563141,
  's': 'ETHUSDT',
  't': 3701863556,
  'p': '3733.67',
  'q': '0.406',
  'X': 'MARKET',
  'm': False},
 'timestamp': 1709638563141,
 'datetime': '2024-03-05T11:36:03.141Z',
 'symbol': 'ETH/USDT:USDT',
 'id': '3701863556',
 'order': None,
 'type': None,
 'side': 'buy',
 'takerOrMaker': None,
 'price': 3733.67,
 'amount': 0.406,
 'cost': 1515.87002,
 'fee': None,
 'fees': []}

In [283]:
from tqdm import tqdm

my_trades = []
for trade in tqdm(trades_ccxt_ws):
    if (
        trade["timestamp"] >= 1709640960000
        and trade["timestamp"] <= 1709640960000 + 60000
    ):
        my_trades.append(trade)

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 888170974/888170974 [06:06<00:00, 2420443.65it/s]


In [285]:
len(my_trades)

17945332

In [286]:
df = pd.DataFrame(my_trades)

In [287]:
df = df[
    ["timestamp", "datetime", "symbol", "id", "side", "price", "amount", "cost"]
]
df = df.drop_duplicates()

In [288]:
len(df)

4648

In [290]:
trades_ws_ccxt = df.to_dict(orient="records")

In [291]:
trades_ws_ccxt

[{'timestamp': 1709640960021,
  'datetime': '2024-03-05T12:16:00.021Z',
  'symbol': 'ETH/USDT:USDT',
  'id': '3702038934',
  'side': 'sell',
  'price': 3767.31,
  'amount': 0.141,
  'cost': 531.19071},
 {'timestamp': 1709640960054,
  'datetime': '2024-03-05T12:16:00.054Z',
  'symbol': 'ETH/USDT:USDT',
  'id': '3702038935',
  'side': 'sell',
  'price': 3767.31,
  'amount': 0.042,
  'cost': 158.22702},
 {'timestamp': 1709640960054,
  'datetime': '2024-03-05T12:16:00.054Z',
  'symbol': 'ETH/USDT:USDT',
  'id': '3702038936',
  'side': 'sell',
  'price': 3767.31,
  'amount': 0.036,
  'cost': 135.62316},
 {'timestamp': 1709640960054,
  'datetime': '2024-03-05T12:16:00.054Z',
  'symbol': 'ETH/USDT:USDT',
  'id': '3702038937',
  'side': 'sell',
  'price': 3767.31,
  'amount': 0.026,
  'cost': 97.95006},
 {'timestamp': 1709640960054,
  'datetime': '2024-03-05T12:16:00.054Z',
  'symbol': 'ETH/USDT:USDT',
  'id': '3702038938',
  'side': 'sell',
  'price': 3767.31,
  'amount': 0.037,
  'cost': 139

In [292]:
df_full = pd.read_csv("../../../../../../trades_data_ws_binance.csv")

In [295]:
df_full

Unnamed: 0.1,Unnamed: 0,e,E,T,s,t,p,q,X,m
0,0,trade,1709638481972,1709638481971,ETHUSDT,3701858304,3735.58,1.550,MARKET,False
1,1,trade,1709638481972,1709638481972,ETHUSDT,3701858305,3735.57,0.010,MARKET,True
2,2,trade,1709638481974,1709638481973,ETHUSDT,3701858306,3735.57,0.049,MARKET,True
3,3,trade,1709638481974,1709638481973,ETHUSDT,3701858307,3735.57,0.007,MARKET,True
4,4,trade,1709638481974,1709638481973,ETHUSDT,3701858308,3735.57,0.007,MARKET,True
...,...,...,...,...,...,...,...,...,...,...
286701,286701,trade,1709642486413,1709642486412,ETHUSDT,3702145005,3761.14,0.946,MARKET,True
286702,286702,trade,1709642486490,1709642486490,ETHUSDT,3702145006,3761.15,1.000,MARKET,False
286703,286703,trade,1709642486646,1709642486646,ETHUSDT,3702145007,3761.15,0.007,MARKET,False
286704,286704,trade,1709642486646,1709642486646,ETHUSDT,3702145008,3761.15,1.473,MARKET,False


In [296]:
df = df_full[
    ((df_full["T"] >= 1709640960000) & (df_full["T"] <= (1709640960000 + 60000)))
]

In [297]:
len(df)

4648

In [299]:
df["timestamp"] = df["T"]
df["id"] = df["t"]
df["price"] = df["p"]
df["amount"] = df["q"]

In [300]:
trades_ws_binance = df.to_dict(orient="records")

In [301]:
ccxt_exchange.build_ohlcvc(trades_ws_binance)

[[1709640960000, 3767.31, 7042.67, 3762.46, 3764.32, 2475.4780000000233, 4648]]

In [302]:
ccxt_exchange.build_ohlcvc(trades_ws_ccxt)

[[1709640960000, 3767.31, 7042.67, 3762.46, 3764.32, 2475.4780000000233, 4648]]

In [303]:
for trade in trades_ws_ccxt:
    if trade["price"] == 7042.67:
        print(trade)

{'timestamp': 1709640988575, 'datetime': '2024-03-05T12:16:28.575Z', 'symbol': 'ETH/USDT:USDT', 'id': '3702041010', 'side': 'sell', 'price': 7042.67, 'amount': 0.002, 'cost': 14.08534}


In [240]:
536.6359999999945 - 536.5779999999986

0.0579999999959

In [262]:
def found_trade_id(id):
    for trade in trades_ws_binance:
        if trade["l"] >= id and trade["f"] <= id:
            return True
    return False

In [263]:
for id in range(3691724382, 3691725689 + 1):
    if not found_trade_id(id):
        print(id)

3691725522


In [273]:
trades_ws_ccxt

[{'timestamp': 1709480580478,
  'datetime': '2024-03-03T15:43:00.478Z',
  'symbol': 'ETH/USDT:USDT',
  'id': '3691724382',
  'side': 'sell',
  'price': 3431.98,
  'amount': 3.455,
  'cost': 11857.4909},
 {'timestamp': 1709480580702,
  'datetime': '2024-03-03T15:43:00.702Z',
  'symbol': 'ETH/USDT:USDT',
  'id': '3691724383',
  'side': 'sell',
  'price': 3431.98,
  'amount': 0.016,
  'cost': 54.91168},
 {'timestamp': 1709480580712,
  'datetime': '2024-03-03T15:43:00.712Z',
  'symbol': 'ETH/USDT:USDT',
  'id': '3691724384',
  'side': 'sell',
  'price': 3431.98,
  'amount': 3.496,
  'cost': 11998.20208},
 {'timestamp': 1709480580712,
  'datetime': '2024-03-03T15:43:00.712Z',
  'symbol': 'ETH/USDT:USDT',
  'id': '3691724385',
  'side': 'sell',
  'price': 3431.98,
  'amount': 5.0,
  'cost': 17159.9},
 {'timestamp': 1709480580712,
  'datetime': '2024-03-03T15:43:00.712Z',
  'symbol': 'ETH/USDT:USDT',
  'id': '3691724386',
  'side': 'sell',
  'price': 3431.98,
  'amount': 0.732,
  'cost': 2512

In [328]:
trades_error = {"binance": trades_ws_binance, "ccxt": trades_ws_ccxt}
pickle.dump(trades_error, open("trades_error_1709640960000.pkl", "wb"))

In [304]:
df_agg_trade = pd.read_csv("trades_data_ws_binance_agg_trade.csv")
df_trade = pd.read_csv("trades_data_ws_binance_trade.csv")

In [305]:
df_agg_trade.head()

Unnamed: 0.1,Unnamed: 0,e,E,a,s,p,q,f,l,T,m
0,0,aggTrade,1709699443548,1527349983,ETHUSDT,3555.07,1.406,3709345386,3709345396,1709699443427,True
1,1,aggTrade,1709699443681,1527349984,ETHUSDT,3555.07,1.406,3709345397,3709345401,1709699443546,True
2,2,aggTrade,1709699443827,1527349985,ETHUSDT,3555.07,0.703,3709345402,3709345402,1709699443679,True
3,3,aggTrade,1709699443937,1527349986,ETHUSDT,3555.07,1.406,3709345403,3709345404,1709699443825,True
4,4,aggTrade,1709699444054,1527349987,ETHUSDT,3555.07,1.406,3709345405,3709345410,1709699443936,True


In [306]:
df_trade.head()

Unnamed: 0.1,Unnamed: 0,e,E,T,s,t,p,q,X,m
0,0,trade,1709699594705,1709699594705,ETHUSDT,3709350188,3560.63,0.007,MARKET,True
1,1,trade,1709699594705,1709699594705,ETHUSDT,3709350189,3560.63,0.193,MARKET,True
2,2,trade,1709699594717,1709699594716,ETHUSDT,3709350190,3560.63,0.124,MARKET,True
3,3,trade,1709699594717,1709699594716,ETHUSDT,3709350191,3560.63,0.037,MARKET,True
4,4,trade,1709699594717,1709699594716,ETHUSDT,3709350192,3560.63,2.5,MARKET,True


In [308]:
df_agg_trade["timestamp"] = df_agg_trade["T"]
df_agg_trade["id"] = df_agg_trade["l"]
df_agg_trade["price"] = df_agg_trade["p"]
df_agg_trade["amount"] = df_agg_trade["q"]
agg_trades_binance = df_agg_trade.to_dict(orient="records")

In [316]:
df_trade["timestamp"] = df_trade["T"]
df_trade["id"] = df_trade["t"]
df_trade["price"] = df_trade["p"]
df_trade["amount"] = df_trade["q"]
trades_binance = df_trade.to_dict(orient="records")

In [321]:
ohlcv_agg_trade = pd.DataFrame(
    ccxt_exchange.build_ohlcvc(agg_trades_binance),
    columns=["timestamp", "open", "high", "low", "close", "volume", "count"],
)
ohlcv_trade = pd.DataFrame(
    ccxt_exchange.build_ohlcvc(trades_binance),
    columns=["timestamp", "open", "high", "low", "close", "volume", "count"],
)

In [322]:
ohlcv_agg_trade

Unnamed: 0,timestamp,open,high,low,close,volume,count
0,1709699400000,3555.07,3555.07,3552.84,3552.84,299.446,193
1,1709699460000,3552.84,3556.80,3552.47,3555.88,1384.329,846
2,1709699520000,3555.89,3557.94,3555.00,3557.93,657.172,568
3,1709699580000,3557.75,3560.64,3556.36,3557.51,930.879,786
4,1709699640000,3557.51,3562.38,3557.23,3562.38,1235.291,932
...,...,...,...,...,...,...,...
63,1709703180000,3748.94,3753.79,3746.00,3746.00,5251.909,2764
64,1709703240000,3746.00,3746.87,3732.65,3734.38,12570.065,4486
65,1709703300000,3734.37,3741.98,3731.77,3739.09,7006.102,2894
66,1709703360000,3739.08,3754.99,3738.30,3753.18,7577.095,3272


In [323]:
ohlcv_trade

Unnamed: 0,timestamp,open,high,low,close,volume,count
0,1709699580000,3560.63,3560.63,3556.36,3557.51,744.456,1225
1,1709699640000,3557.51,3562.38,3557.23,3562.38,1235.291,2287
2,1709699700000,3562.37,3564.89,3561.83,3564.73,628.905,1341
3,1709699760000,3564.73,3570.00,3564.49,3570.00,8023.223,5501
4,1709699820000,3569.99,3572.08,3564.49,3564.52,3378.039,3868
...,...,...,...,...,...,...,...
62,1709703300000,3734.37,3741.98,3731.77,3739.09,7005.799,7410
63,1709703360000,3739.09,3754.99,3738.30,3753.18,7577.398,7915
64,1709703420000,3753.19,3758.49,3747.58,3756.73,9320.037,9931
65,1709703480000,3756.73,3756.73,3745.38,3746.85,5992.732,9286


In [324]:
get_error(ohlcv_agg_trade, ohlcv_trade)

Avg Open error  : 0.0
Avg Close error : 0.0
Avg High error  : 0.0
Avg Low error   : 0.0
Avg Volume error: 1.36
        timestamp   open_x   high_x    low_x  close_x  volume_x  count_x  \
28  1709701260000  3668.73  3668.78  3654.68  3656.69  8676.335     3655   
57  1709703000000  3747.81  3758.97  3746.70  3754.19  9832.262     4909   
64  1709703420000  3753.19  3754.99  3747.58  3754.21  3696.520     1746   

     open_y   high_y    low_y  close_y  volume_y  count_y  open_error  \
28  3668.84  3668.84  3654.68  3656.69  8684.978     8959    0.002998   
57  3747.81  3762.74  3746.70  3754.19  9832.304    11454    0.000000   
64  3753.19  3758.49  3747.58  3756.73  9320.037     9931    0.000000   

    high_error  low_error  close_error  volume_error  
28    0.001635        0.0      0.00000      0.099517  
57    0.100193        0.0      0.00000      0.000427  
64    0.093123        0.0      0.06708     60.337926  
        timestamp     open     high      low    close    volume  count


(        timestamp   open_x   high_x    low_x  close_x  volume_x  count_x  \
 28  1709701260000  3668.73  3668.78  3654.68  3656.69  8676.335     3655   
 57  1709703000000  3747.81  3758.97  3746.70  3754.19  9832.262     4909   
 64  1709703420000  3753.19  3754.99  3747.58  3754.21  3696.520     1746   
 
      open_y   high_y    low_y  close_y  volume_y  count_y  open_error  \
 28  3668.84  3668.84  3654.68  3656.69  8684.978     8959    0.002998   
 57  3747.81  3762.74  3746.70  3754.19  9832.304    11454    0.000000   
 64  3753.19  3758.49  3747.58  3756.73  9320.037     9931    0.000000   
 
     high_error  low_error  close_error  volume_error  
 28    0.001635        0.0      0.00000      0.099517  
 57    0.100193        0.0      0.00000      0.000427  
 64    0.093123        0.0      0.06708     60.337926  ,
         timestamp     open     high      low    close    volume  count
 28  1709701260000  3668.84  3668.84  3654.68  3656.69  8684.978   8959
 57  1709703000000  374

In [326]:
df_trade.tail()

Unnamed: 0.1,Unnamed: 0,e,E,T,s,t,p,q,X,m,timestamp,id,price,amount
601274,601274,trade,1709703594615,1709703594614,ETHUSDT,3709951462,3738.24,3.599,MARKET,False,1709703594614,3709951462,3738.24,3.599
601275,601275,trade,1709703594633,1709703594633,ETHUSDT,3709951463,3738.24,0.034,MARKET,False,1709703594633,3709951463,3738.24,0.034
601276,601276,trade,1709703594684,1709703594684,ETHUSDT,3709951464,3738.27,0.17,MARKET,True,1709703594684,3709951464,3738.27,0.17
601277,601277,trade,1709703594695,1709703594695,ETHUSDT,3709951465,3738.27,0.18,MARKET,True,1709703594695,3709951465,3738.27,0.18
601278,601278,trade,1709703594712,1709703594712,ETHUSDT,3709951466,3738.27,0.008,MARKET,True,1709703594712,3709951466,3738.27,0.008


In [327]:
df_agg_trade.tail()

Unnamed: 0.1,Unnamed: 0,e,E,a,s,p,q,f,l,T,m,timestamp,id,price,amount
230474,230474,aggTrade,1709703443546,1527580457,ETHUSDT,3754.35,0.137,3709925788,3709925788,1709703443393,False,1709703443393,3709925788,3754.35,0.137
230475,230475,aggTrade,1709703443563,1527580458,ETHUSDT,3754.34,5.337,3709925789,3709925796,1709703443560,True,1709703443560,3709925796,3754.34,5.337
230476,230476,aggTrade,1709703443563,1527580459,ETHUSDT,3754.33,0.006,3709925797,3709925797,1709703443560,True,1709703443560,3709925797,3754.33,0.006
230477,230477,aggTrade,1709703443563,1527580460,ETHUSDT,3754.3,1.658,3709925798,3709925799,1709703443560,True,1709703443560,3709925799,3754.3,1.658
230478,230478,aggTrade,1709703443563,1527580461,ETHUSDT,3754.21,0.875,3709925800,3709925800,1709703443561,True,1709703443561,3709925800,3754.21,0.875
