In [3]:
import os
from pprint import pprint
import zipfile
import pandas as pd

base_dir = "/Users/meng/MyFolder/binance-auto-trader"
workspace_dir = os.path.join(base_dir, "workspace")
data_store_dir = os.path.join(workspace_dir, "data-store")
data_loader_dir = os.path.join(workspace_dir, "binance-public-data")

In [4]:
print(base_dir)
print(workspace_dir)
print(data_store_dir)
print(data_loader_dir)

/Users/meng/MyFolder/binance-auto-trader
/Users/meng/MyFolder/binance-auto-trader/workspace
/Users/meng/MyFolder/binance-auto-trader/workspace/data-store
/Users/meng/MyFolder/binance-auto-trader/workspace/binance-public-data


In [5]:
os.environ["STORE_DIRECTORY"]=data_store_dir
marketType = "spot"

# 1. Data Load

## spot, klines

In [5]:
symbols = "BTCUSDT"
interval = "1m"
skipMonthly = 1
skipDaily = 0
startDate = "2023-09-27"
endDate = "2023-09-28"

In [17]:
!python binance-public-data/python/download-kline.py \
    -t {marketType} \
    -s {symbols} \
    -i {interval} \
    -skip-monthly {skipMonthly} \
    -startDate {startDate} \
    -endDate {endDate}

Found 1 symbols
[1/1] - start download daily BTCUSDT klines 

File Download: /Users/meng/MyFolder/binance-auto-trader/workspace/data-store/data/spot/daily/klines/BTCUSDT/1m/2023-09-27_2023-09-28/BTCUSDT-1m-2023-09-27.zip
[##################################################]
File Download: /Users/meng/MyFolder/binance-auto-trader/workspace/data-store/data/spot/daily/klines/BTCUSDT/1m/2023-09-27_2023-09-28/BTCUSDT-1m-2023-09-28.zip
[##################################################]

## spot, aggTrades

In [6]:
symbols = "BTCUSDT"
skipMonthly = 1
skipDaily = 0
startDate = "2023-07-29"
endDate = "2023-09-28"

In [7]:
!python binance-public-data/python/download-aggTrade.py \
    -t {marketType} \
    -s {symbols} \
    -skip-monthly {skipMonthly} \
    -startDate {startDate} \
    -endDate {endDate}

fetching 1 symbols from exchange
Found 1 symbols
[1/1] - start download daily BTCUSDT aggTrades 

File Download: /Users/meng/MyFolder/binance-auto-trader/workspace/data-store/data/spot/daily/aggTrades/BTCUSDT/2023-07-29_2023-09-28/BTCUSDT-aggTrades-2023-07-29.zip
[##################################################]
File Download: /Users/meng/MyFolder/binance-auto-trader/workspace/data-store/data/spot/daily/aggTrades/BTCUSDT/2023-07-29_2023-09-28/BTCUSDT-aggTrades-2023-07-30.zip
[##################################################]
File Download: /Users/meng/MyFolder/binance-auto-trader/workspace/data-store/data/spot/daily/aggTrades/BTCUSDT/2023-07-29_2023-09-28/BTCUSDT-aggTrades-2023-07-31.zip
[##################################################]
File Download: /Users/meng/MyFolder/binance-auto-trader/workspace/data-store/data/spot/daily/aggTrades/BTCUSDT/2023-07-29_2023-09-28/BTCUSDT-aggTrades-2023-08-01.zip
[##################################################]
File Download: /Users/meng

## spot, trades

In [8]:
symbols = "BTCUSDT"
skipMonthly = 1
skipDaily = 0
startDate = "2023-07-29"
endDate = "2023-09-28"

In [9]:
!python binance-public-data/python/download-trade.py \
    -t {marketType} \
    -s {symbols} \
    -skip-monthly {skipMonthly} \
    -startDate {startDate} \
    -endDate {endDate}

fetching 1 symbols from exchange
Found 1 symbols
[1/1] - start download daily BTCUSDT trades 

File Download: /Users/meng/MyFolder/binance-auto-trader/workspace/data-store/data/spot/daily/trades/BTCUSDT/2023-07-29_2023-09-28/BTCUSDT-trades-2023-07-29.zip
[##################################################]
File Download: /Users/meng/MyFolder/binance-auto-trader/workspace/data-store/data/spot/daily/trades/BTCUSDT/2023-07-29_2023-09-28/BTCUSDT-trades-2023-07-30.zip
[##################################################]
File Download: /Users/meng/MyFolder/binance-auto-trader/workspace/data-store/data/spot/daily/trades/BTCUSDT/2023-07-29_2023-09-28/BTCUSDT-trades-2023-07-31.zip
[##################################################]
File Download: /Users/meng/MyFolder/binance-auto-trader/workspace/data-store/data/spot/daily/trades/BTCUSDT/2023-07-29_2023-09-28/BTCUSDT-trades-2023-08-01.zip
[##################################################]
File Download: /Users/meng/MyFolder/binance-auto-trad

# 2. Data Transform

In [6]:
klines_path = os.path.join(base_dir, "workspace/data-store/data/spot/daily/klines/BTCUSDT/1s/2023-07-29_2023-09-28")
aggtrades_path = os.path.join(base_dir, "workspace/data-store/data/spot/daily/aggTrades/BTCUSDT/2023-07-29_2023-09-28")
trades_path = os.path.join(base_dir, "workspace/data-store/data/spot/daily/trades/BTCUSDT/2023-07-29_2023-09-28")

In [32]:
def read_csv_from_zip(base_path, zip_file_path):
    zf = zipfile.ZipFile(os.path.join(base_path, zip_file_path))
    df = pd.read_csv(zf.open(zip_file_path.split('.')[0]+".csv"), header=None)
    return df

def df_transform(base_path):
    zip_list = sorted(os.listdir(base_path))
    res_df = pd.DataFrame()
    for id, zip_file_path in enumerate(zip_list):
        try: # .DS_store
            df = read_csv_from_zip(base_path, zip_file_path)
            print(zip_file_path.split('.')[0]+".csv"+", shape : {}".format(df.shape), end=' ')
            if len(res_df)==0:
                res_df = df
            else:
                res_df = pd.concat([res_df, df], axis=0)
        except:
            continue
        print("res_df shape: {}".format(res_df.shape))
    res_df.reset_index(drop=True, inplace=True)
    return res_df

In [33]:
kl_df = df_transform(klines_path)

BTCUSDT-1s-2023-07-29.csv, shape : (86400, 12) res_df shape: (86400, 12)
BTCUSDT-1s-2023-07-30.csv, shape : (86400, 12) res_df shape: (172800, 12)
BTCUSDT-1s-2023-07-31.csv, shape : (86400, 12) res_df shape: (259200, 12)
BTCUSDT-1s-2023-08-01.csv, shape : (86400, 12) res_df shape: (345600, 12)
BTCUSDT-1s-2023-08-02.csv, shape : (86400, 12) res_df shape: (432000, 12)
BTCUSDT-1s-2023-08-03.csv, shape : (86400, 12) res_df shape: (518400, 12)
BTCUSDT-1s-2023-08-04.csv, shape : (86400, 12) res_df shape: (604800, 12)
BTCUSDT-1s-2023-08-05.csv, shape : (86400, 12) res_df shape: (691200, 12)
BTCUSDT-1s-2023-08-06.csv, shape : (86400, 12) res_df shape: (777600, 12)
BTCUSDT-1s-2023-08-07.csv, shape : (86400, 12) res_df shape: (864000, 12)
BTCUSDT-1s-2023-08-08.csv, shape : (86400, 12) res_df shape: (950400, 12)
BTCUSDT-1s-2023-08-09.csv, shape : (86400, 12) res_df shape: (1036800, 12)
BTCUSDT-1s-2023-08-10.csv, shape : (86400, 12) res_df shape: (1123200, 12)
BTCUSDT-1s-2023-08-11.csv, shape : (8

In [36]:
kl_df.to_pickle(os.path.join(data_store_dir, "BTCUSDT-230729_230928", "klines-1s.pkl"))

In [37]:
del kl_df

In [35]:
at_df = df_transform(aggtrades_path)

BTCUSDT-aggTrades-2023-07-29.csv, shape : (270098, 8) res_df shape: (270098, 8)
BTCUSDT-aggTrades-2023-07-30.csv, shape : (334570, 8) res_df shape: (604668, 8)
BTCUSDT-aggTrades-2023-07-31.csv, shape : (400283, 8) res_df shape: (1004951, 8)
BTCUSDT-aggTrades-2023-08-01.csv, shape : (613670, 8) res_df shape: (1618621, 8)
BTCUSDT-aggTrades-2023-08-02.csv, shape : (627482, 8) res_df shape: (2246103, 8)
BTCUSDT-aggTrades-2023-08-03.csv, shape : (435358, 8) res_df shape: (2681461, 8)
BTCUSDT-aggTrades-2023-08-04.csv, shape : (388889, 8) res_df shape: (3070350, 8)
BTCUSDT-aggTrades-2023-08-05.csv, shape : (262831, 8) res_df shape: (3333181, 8)
BTCUSDT-aggTrades-2023-08-06.csv, shape : (269225, 8) res_df shape: (3602406, 8)
BTCUSDT-aggTrades-2023-08-07.csv, shape : (495710, 8) res_df shape: (4098116, 8)
BTCUSDT-aggTrades-2023-08-08.csv, shape : (621988, 8) res_df shape: (4720104, 8)
BTCUSDT-aggTrades-2023-08-09.csv, shape : (530044, 8) res_df shape: (5250148, 8)
BTCUSDT-aggTrades-2023-08-10.c

In [38]:
at_df.to_pickle(os.path.join(data_store_dir, "BTCUSDT-230729_230928", "aggtrades.pkl"))

In [39]:
del at_df

In [40]:
tr_df = df_transform(trades_path)

BTCUSDT-trades-2023-07-29.csv, shape : (339750, 7) res_df shape: (339750, 7)
BTCUSDT-trades-2023-07-30.csv, shape : (454846, 7) res_df shape: (794596, 7)
BTCUSDT-trades-2023-07-31.csv, shape : (563115, 7) res_df shape: (1357711, 7)
BTCUSDT-trades-2023-08-01.csv, shape : (900141, 7) res_df shape: (2257852, 7)
BTCUSDT-trades-2023-08-02.csv, shape : (945158, 7) res_df shape: (3203010, 7)
BTCUSDT-trades-2023-08-03.csv, shape : (651970, 7) res_df shape: (3854980, 7)
BTCUSDT-trades-2023-08-04.csv, shape : (544850, 7) res_df shape: (4399830, 7)
BTCUSDT-trades-2023-08-05.csv, shape : (331924, 7) res_df shape: (4731754, 7)
BTCUSDT-trades-2023-08-06.csv, shape : (338654, 7) res_df shape: (5070408, 7)
BTCUSDT-trades-2023-08-07.csv, shape : (671438, 7) res_df shape: (5741846, 7)
BTCUSDT-trades-2023-08-08.csv, shape : (877168, 7) res_df shape: (6619014, 7)
BTCUSDT-trades-2023-08-09.csv, shape : (750252, 7) res_df shape: (7369266, 7)
BTCUSDT-trades-2023-08-10.csv, shape : (513691, 7) res_df shape: (

In [41]:
tr_df.to_pickle(os.path.join(data_store_dir, "BTCUSDT-230729_230928", "trades.pkl"))