In [1]:
import os

os.chdir("../")

from dotenv import load_dotenv

load_dotenv(".env")

True

In [2]:
import black
import jupyter_black

jupyter_black.load(
    lab=False,
    line_length=79,
    verbosity="INFO",
    target_version=black.TargetVersion.PY310,
)

<IPython.core.display.Javascript object>

In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

%load_ext autoreload
%autoreload 2

TEXT_COLOR = "#313131"
# Plotly colors
LINE_COLORS = [
    "#636EFA",
    "#EF553B",
    "#00CC96",
    "#AB63FA",
    "#FFA15A",
    "#19D3F3",
    "#FF6692",
    "#B6E880",
    "#FF97FF",
    "#FECB52",
]

sns.set(
    style="darkgrid",
    rc={
        "figure.figsize": (6, 4),
        "figure.dpi": 100,
        "figure.facecolor": "w",
        "legend.facecolor": "w",
        "text.color": TEXT_COLOR,
        "font.family": "Microsoft Sans Serif",
        "axes.labelcolor": TEXT_COLOR,
        "xtick.color": TEXT_COLOR,
        "ytick.color": TEXT_COLOR,
    },
)

sns.set_palette(sns.color_palette(LINE_COLORS))

In [4]:
from src.dataloaders import CandleStickDataLoader, DataLoader
from clients.binance import BinanceClient
from clients import ENV
import constants

from binance import enums

In [5]:
# dl = DataLoader()

In [328]:
dl = CandleStickDataLoader(
    interval=enums.KLINE_INTERVAL_1MINUTE,
    assets=constants.PREDICTOR_ASSETS + [constants.ASSET_TO_TRADE],
    fiat=constants.FIAT_TO_TRADE,
)

In [330]:
start = dl.date_to_timestamp("2023-01-10 00:00:00")
end = dl.date_to_timestamp("2023-01-15 00:00:00")

In [331]:
data = dl.load(start, end)

In [None]:
pd.read_parquet

# Check if resampled data matches 

In [308]:
data["time"] = pd.to_datetime(data["Open time"].apply(dl.timestamp_to_date))

In [313]:
data = data.reset_index()

In [316]:
def resample_klines_df(df: pd.DataFrame, time_col: str, dst_freq: str):
    kline_cols_and_funcs = {
        "Open time": "first",
        "Close time": "last",
        "Open": "first",
        "High": "max",
        "Low": "min",
        "Close": "last",
        "Volume": "sum",
        "Quote asset volume": "sum",
        "Number of trades": "sum",
        "Taker buy base asset volume": "sum",
        "Taker buy quote asset volume": "sum",
    }
    cols_and_aggfuncs = {
        col: func
        for colend, func in kline_cols_and_funcs.items()
        for col in cols[cols.str.endswith(colend)].tolist()
    }
    resampled_df = (
        df.set_index(time_col).resample(dst_freq).agg(cols_and_aggfuncs)
    )
    return resampled_df.reset_index()

In [327]:
resample_klines_df(data, "time", "15T")

Unnamed: 0,time,Open time,Close time,BTCUSDT_Open,BTCUSDT_High,BTCUSDT_Low,BTCUSDT_Close,BTCUSDT_Volume,BTCUSDT_Quote asset volume,BTCUSDT_Number of trades,BTCUSDT_Taker buy base asset volume,BTCUSDT_Taker buy quote asset volume
0,2023-01-01 00:00:00,1672531200000,1672532099999,16541.77,16544.76,16520.00,16520.69,1172.53835,1.938455e+07,37484,553.35053,9.148050e+06
1,2023-01-01 00:15:00,1672532100000,1672532999999,16521.26,16545.70,16517.72,16544.19,1102.62888,1.822982e+07,33528,588.35947,9.727375e+06
2,2023-01-01 00:30:00,1672533000000,1672533899999,16544.19,16544.61,16508.39,16515.43,1365.65633,2.256529e+07,48518,664.46239,1.097913e+07
3,2023-01-01 00:45:00,1672533900000,1672534799999,16515.91,16536.84,16515.43,16529.67,724.01214,1.196664e+07,30324,373.77533,6.177796e+06
4,2023-01-01 01:00:00,1672534800000,1672535699999,16529.59,16541.80,16525.78,16538.21,977.24680,1.615868e+07,32200,458.02292,7.573419e+06
...,...,...,...,...,...,...,...,...,...,...,...,...
859,2023-01-09 22:45:00,1673304300000,1673305199999,17214.13,17214.57,17190.27,17204.83,967.99416,1.665260e+07,25613,469.60071,8.078764e+06
860,2023-01-09 23:00:00,1673305200000,1673306099999,17204.83,17205.57,17128.00,17165.43,3432.49270,5.892390e+07,67860,1554.77216,2.669103e+07
861,2023-01-09 23:15:00,1673306100000,1673306999999,17165.75,17182.24,17156.77,17171.08,1345.33976,2.310440e+07,36359,675.45487,1.160001e+07
862,2023-01-09 23:30:00,1673307000000,1673307899999,17171.08,17194.71,17169.90,17189.55,1403.25475,2.411439e+07,34123,744.44399,1.279315e+07


In [237]:
data["Open dt"] = data["Open time"].apply(dl.timestamp_to_date)
data["Close dt"] = data["Close time"].apply(dl.timestamp_to_date)

In [239]:
data.drop(["Open dt", "Close dt"], axis=1)

Unnamed: 0,Open time,BTCUSDT_Open,BTCUSDT_High,BTCUSDT_Low,BTCUSDT_Close,BTCUSDT_Volume,Close time,BTCUSDT_Quote asset volume,BTCUSDT_Number of trades,BTCUSDT_Taker buy base asset volume,...,XRPUSDT_Taker buy quote asset volume_y,ETHUSDT_Open,ETHUSDT_High,ETHUSDT_Low,ETHUSDT_Close,ETHUSDT_Volume,ETHUSDT_Quote asset volume,ETHUSDT_Number of trades,ETHUSDT_Taker buy base asset volume,ETHUSDT_Taker buy quote asset volume
0,1672574400000,16556.66,16572.94,16548.58,16560.93,4150.48564,1672577999999,68733340.0,146800,2064.92986,...,673059.6252,1196.35,1198.5,1195.83,1197.17,4418.7365,5289956.0,9228,2360.9452,2826257.0
1,1672578000000,16561.29,16567.26,16539.05,16548.04,4379.68203,1672581599999,72483200.0,151206,2132.25783,...,862339.408,1197.16,1197.29,1193.54,1195.26,4804.449,5742834.0,10193,1915.1418,2289291.0
2,1672581600000,16547.61,16558.56,16540.85,16542.94,3294.99093,1672585199999,54535020.0,110412,1660.99212,...,378712.9364,1195.26,1195.86,1194.2,1194.5,3496.2584,4178624.0,8320,1650.5311,1972653.0
3,1672585200000,16542.94,16559.99,16533.68,16558.73,3220.93236,1672588799999,53292960.0,112258,1657.51542,...,977973.5827,1194.5,1196.81,1194.34,1196.81,2771.4134,3312992.0,7903,1432.0601,1711801.0
4,1672588800000,16558.73,16584.66,16558.0,16565.14,4962.13235,1672592399999,82217240.0,152217,2514.82348,...,513269.7587,1196.81,1198.35,1195.88,1197.34,8667.6825,10377660.0,11497,5146.765,6161954.0
5,1672592400000,16565.13,16596.64,16562.51,16578.85,4714.40354,1672595999999,78161010.0,151893,2395.66351,...,620530.7917,1197.35,1203.5,1197.34,1200.68,10429.0438,12522360.0,13059,5530.1178,6638510.0
6,1672596000000,16579.25,16603.89,16578.85,16591.88,4146.37245,1672599599999,68800700.0,141406,2156.10882,...,686807.867,1200.67,1202.91,1200.67,1200.92,5269.1583,6331625.0,9936,2781.0277,3341721.0
7,1672599600000,16591.35,16623.65,16591.34,16603.08,4709.74023,1672603199999,78223010.0,148980,2370.80934,...,748876.8103,1200.93,1204.51,1200.92,1201.78,6650.6305,7999635.0,11595,3761.8844,4524603.0
8,1672603200000,16603.53,16628.0,16600.55,16604.51,4189.05318,1672606799999,69590380.0,147849,2126.6117,...,675793.4339,1201.77,1204.67,1201.39,1201.4,4423.7644,5320571.0,9276,2152.6807,2589032.0
9,1672606800000,16604.51,16608.83,16592.63,16606.68,3656.77804,1672610399999,60709000.0,136504,1858.75074,...,367967.2369,1201.4,1203.08,1200.65,1202.85,3422.5871,4113624.0,8856,2137.2872,2568848.0


In [209]:
pd.to_parquet()

AttributeError: module 'pandas' has no attribute 'to_parquet'

In [175]:
dl.timestamp_to_date()

TypeError: cannot convert the series to <class 'int'>

In [168]:
dl.data

Unnamed: 0,Open time,BTCUSDT_Open,BTCUSDT_High,BTCUSDT_Low,BTCUSDT_Close,BTCUSDT_Volume,Close time,BTCUSDT_Quote asset volume,BTCUSDT_Number of trades,BTCUSDT_Taker buy base asset volume,...,XRPUSDT_Taker buy quote asset volume_y,ETHUSDT_Open,ETHUSDT_High,ETHUSDT_Low,ETHUSDT_Close,ETHUSDT_Volume,ETHUSDT_Quote asset volume,ETHUSDT_Number of trades,ETHUSDT_Taker buy base asset volume,ETHUSDT_Taker buy quote asset volume
0,1672531200000,16541.77,16545.7,16508.39,16529.67,4364.8357,1672534799999,72146290.0,149854,2179.94772,...,2193628.0,1196.13,1196.7,1192.72,1194.09,5889.384,7036230.0,15832,2550.8294,3046972.0
1,1672534800000,16529.59,16556.8,16525.78,16551.47,3590.06669,1672538399999,59376760.0,126556,1730.24901,...,947858.0,1194.09,1196.37,1193.84,1196.02,3157.2079,3773475.0,8245,1583.8715,1892940.0
2,1672538400000,16551.47,16559.77,16538.14,16548.19,3318.84038,1672541999999,54919450.0,115398,1611.12302,...,939966.1,1196.01,1196.74,1194.11,1195.4,3752.0476,4484632.0,8377,1771.7331,2117552.0
3,1672542000000,16548.19,16548.19,16518.21,16533.04,4242.0805,1672545599999,70122540.0,137724,2096.09287,...,546212.5,1195.41,1195.41,1191.71,1194.04,7493.4207,8941607.0,10244,3409.5492,4068126.0
4,1672545600000,16533.04,16535.97,16511.92,16521.85,4285.00909,1672549199999,70802640.0,129535,2188.40175,...,837849.3,1194.05,1194.05,1190.57,1192.92,6409.2491,7641847.0,10211,2832.3239,3377225.0
5,1672549200000,16522.22,16534.98,16512.3,16530.71,3717.37178,1672552799999,61412220.0,118541,1925.65583,...,1169391.0,1192.92,1194.67,1192.71,1194.54,2316.3448,2765081.0,7332,1203.6576,1436771.0
6,1672552800000,16530.71,16550.0,16528.81,16539.79,3344.90206,1672556399999,55329560.0,106074,1625.16179,...,522341.2,1194.55,1195.23,1193.81,1194.81,4102.0957,4899784.0,12437,2464.4186,2943527.0
7,1672556400000,16539.79,16544.57,16499.01,16526.19,5184.95822,1672559999999,85663590.0,148545,2476.59015,...,669796.7,1194.81,1195.75,1193.04,1195.06,5826.7698,6960068.0,10202,2978.3992,3557578.0
8,1672560000000,16525.7,16534.03,16508.45,16513.34,4271.92023,1672563599999,70568380.0,135605,2092.4027,...,570902.7,1195.07,1195.9,1194.17,1194.28,3124.6022,3734150.0,7964,1508.1465,1802224.0
9,1672563600000,16513.33,16542.49,16505.2,16537.88,4100.75049,1672567199999,67754200.0,134070,2103.34139,...,728177.9,1194.27,1196.55,1193.98,1196.48,2763.9478,3303314.0,8452,1498.1619,1790407.0


In [124]:
d = np.array(data)[:, :-1]

In [141]:
df = pd.DataFrame(data, columns=dl.dtypes.keys())
df = df.astype(dl.dtypes)

In [142]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 24 entries, 0 to 23
Data columns (total 12 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   Open time                     24 non-null     int64  
 1   Open                          24 non-null     float64
 2   High                          24 non-null     float64
 3   Low                           24 non-null     float64
 4   Close                         24 non-null     float64
 5   Volume                        24 non-null     float64
 6   Close time                    24 non-null     int64  
 7   Quote asset volume            24 non-null     float64
 8   Number of trades              24 non-null     int64  
 9   Taker buy base asset volume   24 non-null     float64
 10  Taker buy quote asset volume  24 non-null     float64
 11  Ignore                        24 non-null     int64  
dtypes: float64(8), int64(4)
memory usage: 2.4 KB


In [129]:
d.reshape(11, 24)

array([['1672531200000', '16541.77000000', '16545.70000000',
        '16508.39000000', '16529.67000000', '4364.83570000',
        '1672534799999', '72146293.58672290', '149854', '2179.94772000',
        '36032352.87035720', '1672534800000', '16529.59000000',
        '16556.80000000', '16525.78000000', '16551.47000000',
        '3590.06669000', '1672538399999', '59376757.34460920', '126556',
        '1730.24901000', '28617416.00486430', '1672538400000',
        '16551.47000000'],
       ['16559.77000000', '16538.14000000', '16548.19000000',
        '3318.84038000', '1672541999999', '54919453.37562130', '115398',
        '1611.12302000', '26660867.86773650', '1672542000000',
        '16548.19000000', '16548.19000000', '16518.21000000',
        '16533.04000000', '4242.08050000', '1672545599999',
        '70122541.93745440', '137724', '2096.09287000',
        '34649044.48513040', '1672545600000', '16533.04000000',
        '16535.97000000', '16511.92000000'],
       ['16521.85000000', '4285

In [107]:
print(
    dl.timestamp_to_date(1672617600000),
    "|",
    dl.timestamp_to_date(1672703999999),
)

2023-01-02 00:00:00 | 2023-01-02 23:59:59


In [91]:
start = dl.date_to_timestamp("2023-01-01 00:00:00")
end = dl.date_to_timestamp("2023-01-07 00:00:00")

In [92]:
for s, e in dl.partition_timestamps_into_days(start, end):
    print(dl.timestamp_to_date(s), "|", dl.timestamp_to_date(e))

2023-01-01 00:00:00 | 2023-01-02 00:00:00
2023-01-02 00:00:00 | 2023-01-03 00:00:00
2023-01-03 00:00:00 | 2023-01-04 00:00:00
2023-01-04 00:00:00 | 2023-01-05 00:00:00
2023-01-05 00:00:00 | 2023-01-06 00:00:00
2023-01-06 00:00:00 | 2023-01-07 00:00:00


In [74]:
dl.timestamp_to_date(1672534799999)

'2023-01-01 00:59:59'

In [43]:
dl.timestamp_to_date(dl.now)

TypeError: int() argument must be a string, a bytes-like object or a number, not 'datetime.datetime'

In [10]:
dl.timestamp_to_date("D")