In [2]:
from tqdm.notebook import tqdm
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import StepLR
from scipy.signal import correlate
from statsmodels.graphics.tsaplots import plot_acf
from torch.utils.data import TensorDataset, DataLoader, Dataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from nn_alpgotrading.data_processing import (
    load_data, 
    good_tickers_chooser, 
    DataResampler, 
    pct_calculator, 
    columns_renamer, 
    get_training_columns, 
    prepare_data, 
    generate_windows
)
import warnings
warnings.filterwarnings("ignore")

# Устройство для работы с нейронными сетями
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f'Using device: {device}')

Using device: cuda


In [4]:
# Константы
PATH = 'data/binance/fut/hour'
RESAMPLE_INTERVAL = '4H'

# Шаг 1: Загрузка данных
close, open_, high, low, qvolume, bvolume, ntrades, takerbuybvolume, takerbuyqvolume = load_data(PATH)

# Шаг 2: Выбор тикеров с малым количеством пропусков
good_tickers = good_tickers_chooser(close)

# Шаг 3: Ресемплинг данных
close = DataResampler.resample_data(close, good_tickers, RESAMPLE_INTERVAL, 'last')
open_ = DataResampler.resample_data(open_, good_tickers, RESAMPLE_INTERVAL, 'first')
high = DataResampler.resample_data(high, good_tickers, RESAMPLE_INTERVAL, 'max')
low = DataResampler.resample_data(low, good_tickers, RESAMPLE_INTERVAL, 'min')
bvolume = DataResampler.resample_data(bvolume, good_tickers, RESAMPLE_INTERVAL, 'sum')
qvolume = DataResampler.resample_data(qvolume, good_tickers, RESAMPLE_INTERVAL, 'sum')
ntrades = DataResampler.resample_data(ntrades, good_tickers, RESAMPLE_INTERVAL, 'sum')
takerbuybvolume = DataResampler.resample_data(takerbuybvolume, good_tickers, RESAMPLE_INTERVAL, 'sum')
takerbuyqvolume = DataResampler.resample_data(takerbuyqvolume, good_tickers, RESAMPLE_INTERVAL, 'sum')

# Шаг 4: Подготовка колонок
returns = pct_calculator(close, is_returns=True)
returns = columns_renamer(returns, "ret")

close = columns_renamer(close, "close")
open_ = columns_renamer(open_, "open")
high = columns_renamer(high, "high")
low = columns_renamer(low, "low")
bvolume = columns_renamer(bvolume, "bvolume")
qvolume = columns_renamer(qvolume, "qvolume")
ntrades = columns_renamer(ntrades, "ntrades")
takerbuybvolume = columns_renamer(takerbuybvolume, "takerbuybvolume")
takerbuyqvolume = columns_renamer(takerbuyqvolume, "takerbuyqvolume")

# Шаг 5: Формирование полного DataFrame
full_data = prepare_data(
    close, open_, high, low, bvolume, qvolume, ntrades, takerbuybvolume, takerbuyqvolume, returns
)

# Шаг 6: Выбор колонок для обучения
close_col = close.columns.tolist()
open_col = open_.columns.tolist()
high_col = high.columns.tolist()
low_col = low.columns.tolist()
bvolume_col = bvolume.columns.tolist()
qvolume_col = qvolume.columns.tolist()
ntrades_col = ntrades.columns.tolist()
takerbuybvolume_col = takerbuybvolume.columns.tolist()
takerbuyqvolume_col = takerbuyqvolume.columns.tolist()

train_columns = get_training_columns(
    close_col, open_col, high_col, low_col, bvolume_col, qvolume_col, ntrades_col, takerbuybvolume_col, takerbuyqvolume_col
)

# Шаг 7: Подготовка данных для обучения
training_data = full_data[train_columns]

# Шаг 8: Генерация оконных выборок
window_size = 100
all_x, all_y = generate_windows(full_data, returns.columns.tolist(), window_size)

print(f"Размер данных X: {all_x.shape}")
print(f"Размер данных Y: {all_y.shape}")

Размер данных X: (7452, 100, 630)
Размер данных Y: (7452, 63)
