#### Простая моделька на скользящем окне, которая по кд выдает линейку между двумя предыдущими значениями

In [1]:
!jupyter nbextension enable --py widgetsnbextension

Enabling notebook extension jupyter-js-widgets/extension...
      - Validating: [32mOK[0m


In [2]:
import numpy as np
import pandas as pd
from datetime import datetime
from dataclasses import dataclass

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from pytorch_lightning import LightningModule, Trainer
from pytorch_lightning.callbacks import RichProgressBar

from sklearn.preprocessing import StandardScaler
from sklearn.metrics import f1_score

import plotly.graph_objects as go
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
from tqdm.notebook import tqdm, tnrange

%matplotlib inline

In [3]:
@dataclass
class Config:
    # data
    symbol = "LTC/USDT"
    path = "/home/naer/work/buttomCash/src/data/BTС-Minute.csv"
    window_size = 6 * 30 * 2 + 1
    train_split_size = 0.9

    # model
    input_size = window_size - 1
    num_lstm_layers = 4
    hidden_size = 128
    dropout = 0.0
    num_classes = 2

    # training
    device = ("cpu",)  # "cuda" or "cpu"
    batch_size = 30
    num_epoch = 3
    learning_rate = 3e-4
    scheduler_step_size = 100

    # backtesting
    init_margin = 50
    imr = 20


config = Config()

In [4]:
def load_data(config, data_range=None):
    data = pd.read_csv(config.path)[data_range:]

    data = data.sort_values(by="date")
    data_date = data["date"].to_list()
    data_date = [data_date[i] for i in range(0, len(data_date), 60 * 4)]
    data_close_price = data["close"].to_list()
    data_close_price = [
        data_close_price[i] for i in range(0, len(data_close_price), 60 * 4)
    ]
    data_close_price = np.array(data_close_price)
    data = data.set_index("date")

    num_data_points = len(data_date)
    display_date_range = (
        "from " + data_date[0] + " to " + data_date[num_data_points - 1]
    )
    print("Number data points:", num_data_points, display_date_range)

    return data_date, data_close_price, data


data_date, data_close_price, data = load_data(config)

Number data points: 11148 from 2017-01-01 00:01:00 to 2022-03-01 03:22:00


In [5]:
def prepare_data(x, window_size):
    scaler = StandardScaler()
    n_row = x.shape[0] - window_size + 1
    strides = np.lib.stride_tricks.as_strided(
        x, shape=(n_row, window_size), strides=(x.strides[0], x.strides[0])
    )
    X = np.empty([1, window_size - 1])
    y_delta = np.empty([1, 1])
    y_last_new = np.empty([1, 2])
    y_binary = np.empty([1, 2])
    for stride in tqdm(strides):
        last_price, new_price = stride[-2], stride[-1]
        y_last_new = np.concatenate(
            (y_last_new, np.array([last_price, new_price]).reshape(1, -1)),
            axis=0,
        )

        delta = round((new_price / last_price - 1) * 100, 1)
        if delta > 0:
            delta_positive = np.array([1, 0])
        else:
            delta_positive = np.array([0, 1])

        norm_stride = scaler.fit_transform(
            stride[:-1].reshape(1, -1).T
        ).reshape(1, -1)
        X = np.concatenate((X, norm_stride), axis=0)
        y_delta = np.concatenate(
            (y_delta, np.array([delta]).reshape(1, -1)), axis=0
        )
        y_binary = np.concatenate(
            (y_binary, delta_positive.reshape(1, -1)), axis=0
        )

    return X[1:], y_delta[1:], y_last_new[1:], y_binary[1:]


def get_train_test_data(data_close_price, config):
    X, y_delta, _, y_binary = prepare_data(
        data_close_price, window_size=config.window_size
    )
    split_index = int(y_delta.shape[0] * config.train_split_size)

    data_x_train = X[:split_index]
    data_x_val = X[split_index:]

    data_y_delta_train = y_delta[:split_index]
    data_y_delta_val = y_delta[split_index:]

    data_y_binary_train = y_binary[:split_index]
    data_y_binary_val = y_binary[split_index:]

    return (
        data_x_train,
        data_y_delta_train,
        data_y_binary_train,
        data_x_val,
        data_y_delta_val,
        data_y_binary_val,
    )


(
    data_x_train,
    data_y_delta_train,
    data_y_binary_train,
    data_x_val,
    data_y_delta_val,
    data_y_binary_val,
) = get_train_test_data(data_close_price, config)

  0%|          | 0/10788 [00:00<?, ?it/s]

In [6]:
class TimeSeriesDataset(Dataset):
    def __init__(self, x, y):
        x = np.expand_dims(x, 2)
        self.x = x.astype(np.float32).reshape(-1, 1, config.window_size - 1)
        self.y = y.astype(np.float32)

    def __len__(self):
        return len(self.x)

    def __getitem__(self, idx):
        return (self.x[idx], self.y[idx])


dataset_train = TimeSeriesDataset(data_x_train, data_y_binary_train)
dataset_val = TimeSeriesDataset(data_x_val, data_y_binary_val)

print("Train data shape", dataset_train.x.shape, dataset_train.y.shape)
print("Validation data shape", dataset_val.x.shape, dataset_val.y.shape)

Train data shape (9709, 1, 360) (9709, 2)
Validation data shape (1079, 1, 360) (1079, 2)


In [7]:
class RNN_GRU(LightningModule):
    def __init__(self, config):
        super(RNN_GRU, self).__init__()
        self.config = config

        self.gru = nn.GRU(
            self.config.input_size,
            self.config.hidden_size,
            self.config.num_lstm_layers,
            batch_first=True,
        )
        self.fc = nn.Linear(self.config.hidden_size, self.config.num_classes)

        self.loss_function = nn.CrossEntropyLoss()

    def forward(self, x):
        h0 = torch.zeros(
            self.config.num_lstm_layers, x.size(0), self.config.hidden_size
        )
        out, _ = self.gru(x, h0)
        out = self.fc(out[:, -1, :])
        return out

    def predict(self, x):
        self.eval()
        h0 = torch.zeros(
            self.config.num_lstm_layers, x.size(0), self.config.hidden_size
        )
        out, _ = self.gru(x, h0)
        out = self.fc(out[:, -1, :])
        ans = int(torch.argmax(out))
        return ans

    def metric(self, pred, target):
        pred, target = torch.argmax(pred, dim=1), torch.argmax(target, dim=1)
        acc = (pred == target).float().mean().item()
        f1 = float(f1_score(target, pred, average="macro"))
        return acc, f1

    def training_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self.forward(x)
        loss = self.loss_function(y_hat, y)
        acc, f1 = self.metric(y_hat, y)
        self.log("train_loss", loss.item(), prog_bar=True)
        self.log("train_acc", acc, prog_bar=True)
        self.log("train_f1", f1, prog_bar=True)
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self.forward(x)
        loss = self.loss_function(y_hat, y)
        acc, f1 = self.metric(y_hat, y)
        self.log("val_loss", loss.item(), prog_bar=True)
        self.log("val_acc", acc, prog_bar=True)
        self.log("val_f1", f1, prog_bar=True)

    def configure_optimizers(self):
        optimizer = optim.Adam(self.parameters(), lr=self.config.learning_rate)
        return optimizer

    def train_dataloader(self):
        train_dataloader = DataLoader(
            dataset_train,
            batch_size=config.batch_size,
            shuffle=True,
            num_workers=6,
        )
        return train_dataloader

    def val_dataloader(self):
        val_dataloader = DataLoader(
            dataset_val,
            batch_size=config.batch_size,
            shuffle=False,
            num_workers=6,
        )
        return val_dataloader


model = RNN_GRU(config)

In [14]:
trainer = Trainer(
    max_epochs=config.num_epoch,
    accelerator="auto",
    # devices= 1 if torch.cuda.is_available() else None,
    callbacks=[RichProgressBar()],
)
trainer.fit(model)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


Output()

`Trainer.fit` stopped: `max_epochs=3` reached.


In [15]:
trainer.validate(model)

Output()

[{'val_loss': 0.6949372291564941,
  'val_acc': 0.5097312331199646,
  'val_f1': 0.37042734026908875}]