<h2>Installing dependencies</h2>

In [None]:
!pip install ohlcformer

In [None]:
import random
import torch
from pathlib import Path
from ohlcformer.data import InputFeatures, convert_to_tensor_dataset
from ohlcformer.data.utils import convert_stock_data, read_stocks_dir, split_time_series, convert_series_to_relative

In [None]:
!git clone https://github.com/niksyromyatnikov/OHLCFormer ohlcformer

In [None]:
path = Path('.').absolute()
data_path = path / 'ohlcformer' / 'examples' / 'data'
stock_data_path = data_path / 'train'
preprocessed_stock_data_path = data_path / 'train-preprocessed'
tensor_dataset_path = data_path / 'train.pt'

In [None]:
verbose = True
seed = 42
max_seq_len = 2000
mask_proba = 0.2
prediction_len = 5
perform_masking = True

In [None]:
random.seed(seed)

<h2>Assembling stock folders with OHLC data into separate files and loading them</h2>

In [None]:
convert_stock_data(stock_data_path, preprocessed_stock_data_path, verbose)

In [None]:
stocks = read_stocks_dir(preprocessed_stock_data_path, verbose)

In [None]:
stocks_splitted = split_time_series(stocks)

In [None]:
len(stocks_splitted)

In [None]:
stocks_splitted[0]

<h2>Converting absolute time-step values to relative ones (inc/dec concerning the prev step)</h2>

In [None]:
dataset = convert_series_to_relative(stocks_splitted)

In [None]:
len(dataset)

In [None]:
dataset[0]

<h2>Creating the tensor dataset and performing tokens masking (if flagged)</h2>

In [None]:
random.shuffle(dataset)

In [None]:
tensor_dataset = convert_to_tensor_dataset(dataset, max_seq_len, mask_proba, prediction_len, perform_masking)

In [None]:
len(tensor_dataset)

In [None]:
tensor_dataset[0]

In [None]:
torch.save(tensor_dataset, tensor_dataset_path)