In [130]:
import pandas as pd
import numpy as np
import datetime 

In [131]:
df = pd.read_csv("BTCUSDT_historical.csv", index_col="Unnamed: 0", parse_dates=True)

In [132]:
df.head()

Unnamed: 0,datetime,open,high,low,close,volume
2017-08-17 00:00:00,1502942400000,4261.48,4313.62,4261.32,4308.83,47.181009
2017-08-17 01:00:00,1502946000000,4308.83,4328.69,4291.37,4315.32,23.234916
2017-08-17 02:00:00,1502949600000,4330.29,4345.45,4309.37,4324.35,7.229691
2017-08-17 03:00:00,1502953200000,4316.62,4349.99,4287.41,4349.99,4.443249
2017-08-17 04:00:00,1502956800000,4333.32,4377.85,4333.32,4360.69,0.972807


In [133]:
df.tail()

Unnamed: 0,datetime,open,high,low,close,volume
2022-09-30 22:00:00,1664589600000,19423.06,19436.0,19384.41,19413.79,5773.83532
2022-09-30 23:00:00,1664593200000,19413.96,19438.75,19361.05,19416.05,5861.49212
2022-10-01 00:00:00,1664596800000,19416.05,19423.63,19370.7,19407.43,5958.93517
2022-10-01 01:00:00,1664600400000,19407.43,19418.88,19270.59,19317.92,12420.01718
2022-10-01 02:00:00,1664604000000,19316.82,19324.94,19266.26,19292.82,7000.01196


In [134]:
df.isnull().sum()

datetime    0
open        0
high        0
low         0
close       0
volume      0
dtype: int64

Let's prepare the data and divided trading sessions between day and night
Day: 9 to 13
Night 14 to 8

For this project we will make some naive assumptions

The market is not random
The price of BTC on the previous day and previous night matter for the next day

In [135]:
df_day = df[(df.index.time >= datetime.time(9))
   & (df.index.time <= datetime.time(16))]

In [136]:
df_night = df[(df.index.time <= datetime.time(8)) |
              (df.index.time >= datetime.time(17))]

In [31]:
df_night = df[~df.index.isin(df_day.index)]

In [137]:
len(df_day), len(df_night), len(df)

(15015, 29985, 45000)

In [138]:
df_day["date"] = pd.to_datetime(df_day.index.date)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_day["date"] = pd.to_datetime(df_day.index.date)


In [139]:
df_night["date"] = pd.to_datetime(df_night.index.date)

df_night["date"] = np.where(df_night.index.time < datetime.time(9),
                                 df_night.index - pd.to_timedelta(1, unit="d"),
                                 df_night.date)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_night["date"] = pd.to_datetime(df_night.index.date)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_night["date"] = np.where(df_night.index.time < datetime.time(9),


In [140]:
df_night.index = df_night.date

In [145]:
ohlc_dict = {
    "open": "first",
    "high": "max",
    "low": "min",
    "close": "last",
    "volume": "sum"}

day_sum = df_day.resample('1d').apply(ohlc_dict).dropna()
night_sum = df_night.resample('1d').apply(ohlc_dict).dropna()

In [142]:
#night_sum = night_sum.shift().dropna()

In [146]:
day_night = day_sum.merge(night_sum, left_index=True, right_index=True, suffixes=('_day', '_night'))

In [147]:
day_night

Unnamed: 0,open_day,high_day,low_day,close_day,volume_day,open_night,high_night,low_night,close_night,volume_night
2017-08-17,4411.00,4485.39,4200.74,4346.74,425.614791,4346.74,4371.52,4134.61,4331.71,936.635599
2017-08-18,4302.97,4318.16,3981.13,4021.11,374.465434,4015.40,4184.69,3850.00,4095.00,410.331225
2017-08-19,4095.00,4103.92,3953.40,4075.98,54.197220,4075.98,4211.08,4032.62,4108.47,80.363966
2017-08-20,4125.00,4185.94,4063.35,4140.93,249.780578,4140.93,4171.62,3953.40,4000.00,672.070734
2017-08-21,4000.00,4042.11,3911.79,4025.04,93.860645,4025.04,4070.49,3400.00,3934.01,648.538834
...,...,...,...,...,...,...,...,...,...,...
2022-09-26,19095.35,19280.00,18967.59,19113.86,158965.193890,19114.59,20385.86,19070.05,20270.88,328507.595800
2022-09-27,20270.88,20346.75,18816.32,19073.46,252251.253370,19073.46,19238.28,18471.28,19072.62,329232.973480
2022-09-28,19072.62,19666.66,19012.00,19568.23,200169.659160,19569.61,19790.00,19127.81,19211.63,241818.837280
2022-09-29,19211.63,19645.52,18843.01,19505.64,171071.912750,19507.09,19700.00,19155.36,19297.19,229305.459690


In [148]:
features = ["high_day", "low_day", "high_night", "low_night"]
categorical_features = []
numerical_features = []
for i in features:
    day_night[f"p{i}_touch"] = np.where(
        (day_night[i].shift() <= day_night["high_day"]) & (day_night[i].shift() >= day_night["low_day"]), 1, 0)
        
    day_night[f"ret_distance_p{i}_open"] = (day_night["open_day"] - day_night[i])/day_night[i]
    categorical_features.append(f"p{i}_touch")
    numerical_features.append(f"ret_distance_p{i}_open")

In [117]:
categorical_features + numerical_features

['phigh_day_touch',
 'plow_day_touch',
 'phigh_night_touch',
 'plow_night_touch',
 'ret_distance_phigh_day_open',
 'ret_distance_plow_day_open',
 'ret_distance_phigh_night_open',
 'ret_distance_plow_night_open']

In [149]:
export_data = day_night[categorical_features + numerical_features]
export_data.to_csv("BTC_feature_data.csv")