In [3]:
import pandas as pd

# load data

In [5]:
btcusdt = pd.read_csv('BTCUSDT_1h.csv')

In [15]:
btcusdt

Unnamed: 0_level_0,open,high,low,close,volume,close_time,quote_asset_volume,num_trades,taker_base_vol,taker_quote_vol
open_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2024-01-01 00:00:00,42283.58,42554.57,42261.02,42475.23,1271.68108,2024-01-01 00:59:59.999,5.395725e+07,47134,682.57581,2.895742e+07
2024-01-01 01:00:00,42475.23,42775.00,42431.65,42613.56,1196.37856,2024-01-01 01:59:59.999,5.098489e+07,50396,712.32227,3.035565e+07
2024-01-01 02:00:00,42613.57,42638.41,42500.00,42581.10,685.21980,2024-01-01 02:59:59.999,2.916738e+07,29863,288.98864,1.230102e+07
2024-01-01 03:00:00,42581.09,42586.64,42230.08,42330.49,794.80391,2024-01-01 03:59:59.999,3.370905e+07,38620,356.37209,1.511300e+07
2024-01-01 04:00:00,42330.50,42399.99,42209.46,42399.99,715.41760,2024-01-01 04:59:59.999,3.027162e+07,36038,371.12012,1.570362e+07
...,...,...,...,...,...,...,...,...,...,...
2024-05-16 10:00:00,66364.69,66364.69,66100.00,66169.99,747.09629,2024-05-16 10:59:59.999,4.946380e+07,34370,322.75738,2.136819e+07
2024-05-16 11:00:00,66170.00,66585.70,66000.00,66503.80,933.73337,2024-05-16 11:59:59.999,6.185368e+07,60929,512.66956,3.397043e+07
2024-05-16 12:00:00,66503.81,66752.01,65904.76,66011.01,2444.27960,2024-05-16 12:59:59.999,1.619270e+08,119071,1261.73855,8.359828e+07
2024-05-16 13:00:00,66011.01,66367.92,65759.99,66328.05,1698.98043,2024-05-16 13:59:59.999,1.122109e+08,68994,882.84958,5.830984e+07


# drop columns

In [6]:
btcusdt.drop(columns=['ignore', 'order_book'], inplace=True)

# Convert 'open_time' and 'close_time' columns to datetime format

In [7]:
btcusdt['open_time'] = pd.to_datetime(btcusdt['open_time'], unit='ms')
btcusdt['close_time'] = pd.to_datetime(btcusdt['close_time'], unit='ms')

# Time Aggregation: Resample data to daily intervals

In [9]:
btcusdt.set_index('open_time', inplace=True)
btcusdt_resampled = btcusdt.resample('D').agg({
    'open': 'first',
    'high': 'max',
    'low': 'min',
    'close': 'last',
    'volume': 'sum',
    'quote_asset_volume': 'sum',
    'num_trades': 'sum',
    'taker_base_vol': 'sum',
    'taker_quote_vol': 'sum',
}).reset_index()

# Feature Engineering: Calculate additional features. eg. price_diff & price_change

In [13]:
btcusdt_resampled['price_diff'] = btcusdt_resampled['high'] - btcusdt_resampled['low']
btcusdt_resampled['price_change'] = btcusdt_resampled['close'] - btcusdt_resampled['open']

In [16]:
btcusdt_resampled

Unnamed: 0,open_time,open,high,low,close,volume,quote_asset_volume,num_trades,taker_base_vol,taker_quote_vol,price_diff,price_change
0,2024-01-01,42283.58,44184.10,42180.77,44179.55,27174.29903,1.169996e+09,1114623,14331.73180,6.173521e+08,2003.33,1895.97
1,2024-01-02,44179.55,45879.63,44148.34,44946.91,65146.40661,2.944332e+09,2247532,33817.14447,1.527964e+09,1731.29,767.36
2,2024-01-03,44946.91,45500.00,40750.00,42845.23,81194.55173,3.507105e+09,2658041,39103.99162,1.687665e+09,4750.00,-2101.68
3,2024-01-04,42845.23,44729.58,42613.77,44151.10,48038.06334,2.095095e+09,1819944,23605.90059,1.030075e+09,2115.81,1305.87
4,2024-01-05,44151.10,44357.46,42450.00,44145.11,48075.25327,2.100954e+09,2064845,24015.06426,1.049655e+09,1907.46,-5.99
...,...,...,...,...,...,...,...,...,...,...,...,...
132,2024-05-12,60825.99,61888.00,60610.00,61483.99,12753.13236,7.810416e+08,727113,6416.91108,3.931324e+08,1278.00,658.00
133,2024-05-13,61484.00,63450.00,60749.21,62940.08,32733.41839,2.041897e+09,1371433,16717.90863,1.043061e+09,2700.79,1456.08
134,2024-05-14,62940.09,63118.36,61142.77,61577.49,29088.72041,1.800172e+09,1127939,13815.81443,8.550881e+08,1975.59,-1362.60
135,2024-05-15,61577.49,66444.16,61319.47,66206.50,43559.74719,2.794260e+09,1729454,21797.84094,1.398782e+09,5124.69,4629.01


# Save btcusdt_resampled to a CSV file


In [17]:
btcusdt_resampled.to_csv('btcusdt_resampled_data.csv', index=False)