# Data Processing Exercise
- Load an pre-process data from https://reach.stratosphere.capital/data/usdm/
- Implement at least one relevant data for HFT decision making

In [1]:
import datetime
from python.scripts.download_data import to_filename, download_file
from urllib.parse import urljoin

EXPORT_PATH = '../data/usdm/'
file_name = to_filename('ethusdt', datetime.date(2024, 7, 30))
dates = [datetime.date(2024, 7, 30), datetime.date(2024, 7, 31), datetime.date(2024, 8, 1)]
for date in dates:
    file_name = to_filename('ethusdt', date)
    url = urljoin('https://reach.stratosphere.capital/data/usdm/', file_name)
    dest = f'{EXPORT_PATH}{file_name}'
    print('Downloading...', url, 'to', dest)
    download_file(url, dest)
print('Download Complete')

Downloading... https://reach.stratosphere.capital/data/usdm/ethusdt_20240730.gz to ../data/usdm/ethusdt_20240730.gz
Existing ../data/usdm/ethusdt_20240730.gz found, skipping download...
Downloading... https://reach.stratosphere.capital/data/usdm/ethusdt_20240731.gz to ../data/usdm/ethusdt_20240731.gz
Downloading... https://reach.stratosphere.capital/data/usdm/ethusdt_20240801.gz to ../data/usdm/ethusdt_20240801.gz
Download Complete


Now that we have the data, we need to normalize it and load it into a hft-backtest format. Each row from the file contains the timestamp the message was received in nanosecons (machine time) and the message received in json format from Binance API. 


In [3]:
import gzip

with gzip.open(dest, 'r') as f:
    for i in range(10):
        line = f.readline()
        print(line)

b'1722346072346098576 {"stream":"ethusdt@depth@0ms","data":{"e":"depthUpdate","E":1722346072148,"T":1722346072144,"s":"ETHUSDT","U":5048435410381,"u":5048435411907,"pu":5048435410345,"b":[["200.00","10.600"],["334.60","0.000"],["500.00","16.960"],["927.00","0.000"],["1003.80","0.037"],["3011.39","0.080"],["3342.65","3.901"],["3342.68","164.137"],["3342.83","0.088"],["3343.75","2.412"],["3344.91","0.297"],["3345.81","0.000"],["3345.91","1.243"]],"a":[["3346.43","11.954"],["3346.48","3.573"],["3346.52","6.717"],["3348.99","1.397"],["3349.04","2.330"],["3349.07","8.423"],["3349.12","2.750"],["3349.15","1.002"],["3349.18","1.438"],["3349.21","1.136"],["3349.27","3.455"],["3349.37","1.253"],["3349.47","19.270"],["3350.68","6.342"],["3352.19","1.830"],["3354.20","4.718"],["3361.56","12.851"],["3363.00","10.138"]]}}\n'
b'1722346072346295005 {"stream":"ethusdt@depth@0ms","data":{"e":"depthUpdate","E":1722346072201,"T":1722346072197,"s":"ETHUSDT","U":5048435412062,"u":5048435413544,"pu":5048435