In [1]:
import pandas as pd
import numpy as np
import datetime as dt

from utils.load_data import get_files

# Load Data

In [2]:
exchange = "binance"

start_date = "20230101"
end_date = "20241231"

start_dt = dt.datetime.strptime(start_date, "%Y%m%d")
end_dt = dt.datetime.strptime(end_date, "%Y%m%d")

In [3]:
level1_data = get_files(start_dt, end_dt, "level1", exchange, "futures", "BTCUSDT")
book_data = get_files(start_dt, end_dt, "book", exchange, "futures", "BTCUSDT")
trade_data = get_files(start_dt, end_dt, "trade", exchange, "futures", "BTCUSDT")

level1_data.shape, book_data.shape, trade_data.shape

((1052640, 28), (1052640, 63), (1052640, 23))

In [4]:
# Convert timestamps to datetime
level1_data['ts_end'] = pd.to_datetime(level1_data['ts_end'], unit='ms')
book_data[['ts_end', 'ts_book']] = book_data[['ts_end', 'ts_book']].apply(pd.to_datetime, unit='ms')
trade_data['ts_end'] = pd.to_datetime(trade_data['ts_end'], unit='ms')

# Set index to ts_end
level1_data.set_index('ts_end', inplace=True)
book_data.set_index('ts_end', inplace=True)
trade_data.set_index('ts_end', inplace=True)

# Features

#### Level1 Features

In [18]:
level1_features = level1_data[['tick_count', 'l3_updates',
       'ask_up_ret', 'ask_down_ret', 'bid_up_ret', 'bid_down_ret',
       'median_spread', 'mean_spread', 'min_spread', 'max_spread',
       'mean_bid_size', 'mean_ask_size', 'tick_volatility']].copy()

In [None]:
def feature_return(level1_data, level1_features, taus):
    for tau in taus:
        # log return
        log_ret_col = f'log_ret_{tau}min'
        level1_features[log_ret_col] = np.log(
            level1_data['close_mid'] / level1_data['close_mid'].shift(tau)
        )

        # absolute return
        abs_ret_col = f'abs_ret_{tau}min'
        level1_features[abs_ret_col] = level1_features[log_ret_col].abs()

        # squared return
        sq_ret_col = f'sq_ret_{tau}min'
        level1_features[sq_ret_col] = level1_features[log_ret_col] ** 2

    return level1_features

In [None]:
level1_features = feature_return(level1_data, level1_features, taus=[1,5,10,15,30,60])