In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from transformers import GPT2Model, GPT2Config, PreTrainedModel
from datasets import load_dataset, Dataset, DatasetDict

from torch.utils.data import DataLoader
from transformers import Trainer, TrainingArguments

import math

In [2]:
class GPT2Trader(PreTrainedModel):
    def __init__(self, config):
        super().__init__(config)
        
        # use levine 2020 layer numbers
        n_layer = round((math.log(config.n_embd) - 5.039) / 5.55e-2)
        n_layer = max(1, n_layer)
        print(f'Using {n_layer} layers')
        config.n_layer = n_layer
        
        config.initializer_range = 1 / math.sqrt(config.n_embd)
        
        self.embed = nn.Linear(5, config.n_embd, bias = False)
        self.norm = nn.LayerNorm(config.n_embd)
        self.gpt = GPT2Model(config)
        self.trade = nn.Linear(config.n_embd, 60, bias = False)
        
        self.trade_sign = nn.Parameter(torch.Tensor([1, -1]), requires_grad = False)
        
    def forward(self, ohlcv, future):
        embed = self.norm(self.embed(ohlcv))
        hidden = self.gpt(inputs_embeds = embed).last_hidden_state
        
        soft_trade = self.trade(hidden).reshape(-1)
        
        # sharpe information
        soft_trade = torch.tanh(soft_trade)
        soft_profit = soft_trade * future.reshape(-1)
        
        # the exp is so that loss is purely positive and minimizes toward 0 (also losses have more loss than profit)
        loss_ppl = torch.exp(-soft_profit).mean()
        
        # penalty for big trades (to stop trading from happening with no profit)
        trade_penalty = soft_trade.abs().mean()
        
        loss = loss_ppl + .05 * trade_penalty # .05 since the .05 * std ~ commission size
        
        trades = soft_trade.abs().sum()
        print(f'sharpe {soft_profit.mean() / soft_profit.std():.4}, t-value {(soft_profit.mean() * trades**.5) / soft_profit.std():.4} '
              f'trade % {trades * 100 / len(soft_profit):,.4}, big trade % {(soft_trade.abs() > .5).sum() * 100 / len(soft_profit):.4}, loss {loss_ppl.item():.4}')
        
        return {"loss": loss}

In [3]:
class TraderTrainer(Trainer):
#     def compute_loss(self, model, inputs, return_outputs=False):
#         labels = inputs.get("labels")
#         # forward pass
#         outputs = model(**inputs)
#         logits = outputs.get("logits")
#         # compute custom loss (suppose one has 3 labels with different weights)
#         loss_fct = nn.CrossEntropyLoss(weight=torch.tensor([1.0, 2.0, 3.0]))
#         loss = loss_fct(logits.view(-1, self.model.config.num_labels), labels.view(-1))
#         return (loss, outputs) if return_outputs else loss

    def get_train_dataloader(self) -> DataLoader:
        """
        Returns the training :class:`~torch.utils.data.DataLoader`.

        Will use no sampler if :obj:`self.train_dataset` does not implement :obj:`__len__`, a random sampler (adapted
        to distributed training if necessary) otherwise.

        Subclass and override this method if you want to inject some custom behavior.
        """
        if self.train_dataset is None:
            raise ValueError("Trainer: training requires a train_dataset.")
        train_sampler = self._get_train_sampler()

        return DataLoader(
            self.train_dataset,
            batch_size=self.args.train_batch_size,
            shuffle=False, # IMPORTANT TO STOP OVERFITTING
#             sampler=train_sampler,
            collate_fn=self.data_collator,
            drop_last=self.args.dataloader_drop_last,
            num_workers=self.args.dataloader_num_workers,
        )

In [4]:
eurusd = Dataset.from_parquet('data/EURUSD_day.pq')

# make splits
split = eurusd.train_test_split(.1, shuffle = False)
valid_test = split['test'].train_test_split(.5, shuffle = False)
eurusd = DatasetDict({
    'train': split['train'],
    'validation': valid_test['train'],
    'test': valid_test['test']
})

Using custom data configuration default-524475b7137ed866
Found cached dataset parquet (C:/Users/micha/.cache/huggingface/datasets/parquet/default-524475b7137ed866/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)


In [5]:
training_args = TrainingArguments(
    output_dir = "./results",
    logging_strategy = "steps",
    evaluation_strategy = "steps",
    logging_steps = 200,
    eval_steps = 200,
    report_to = "none",
    learning_rate = 5e-4,
    lr_scheduler_type = "cosine",
    warmup_ratio = .05,
    num_train_epochs = 1,
    per_device_train_batch_size = 1,
    per_device_eval_batch_size = 1,
    max_grad_norm = 1,
)

In [6]:
config = GPT2Config(
    n_embd = 320, n_head = 5, vocab_size = 0, n_positions = 2000,
    resid_pdrop = .1, embd_pdrop = .1, attn_pdrop = .1,
#     resid_pdrop = .0, embd_pdrop = .0, attn_pdrop = .0,
    scale_attn_by_inverse_layer_idx = True, use_cache = False
)

In [7]:
model = GPT2Trader(config).cuda()
trainer = TraderTrainer(
    model = model,
    args = training_args,
    train_dataset = eurusd['train'],
    eval_dataset = eurusd['validation']
)

Using 13 layers


In [8]:
trainer.train()

***** Running training *****
  Num examples = 3098
  Num Epochs = 1
  Instantaneous batch size per device = 1
  Total train batch size (w. parallel, distributed & accumulation) = 1
  Gradient Accumulation steps = 1
  Total optimization steps = 3098
Could not estimate the number of tokens of the input, floating-point operations will not be computed


sharpe 0.009729, t-value 1.747 trade % 37.31, big trade % 31.04, loss 1.86e+12


Step,Training Loss,Validation Loss
200,55319559950827.52,1.000676
400,11.9887,1.00172
600,1.3543,1.024451
800,1.051,0.999747
1000,1.0003,0.999269
1200,0.9995,0.999727
1400,1.0003,0.999136
1600,0.9995,0.999085
1800,1.0005,0.998532
2000,0.9991,0.997905


sharpe 0.007773, t-value 1.384 trade % 36.7, big trade % 30.09, loss 1.631
sharpe 0.0101, t-value 1.777 trade % 35.84, big trade % 28.63, loss 172.3
sharpe 0.005823, t-value 1.022 trade % 35.66, big trade % 28.47, loss 3.555
sharpe 0.005891, t-value 1.066 trade % 37.93, big trade % 32.25, loss 6.689e+11
sharpe 0.01624, t-value 2.815 trade % 34.76, big trade % 26.58, loss 1.394
sharpe 0.003126, t-value 0.5439 trade % 35.04, big trade % 27.45, loss 1.791e+05
sharpe 0.01285, t-value 2.177 trade % 33.2, big trade % 24.49, loss 1.295
sharpe 0.02582, t-value 4.311 trade % 32.28, big trade % 22.51, loss 1.583
sharpe 0.01642, t-value 2.698 trade % 31.26, big trade % 21.43, loss 1.605
sharpe 0.02375, t-value 3.782 trade % 29.34, big trade % 17.58, loss 77.4
sharpe 0.01212, t-value 1.981 trade % 30.9, big trade % 20.91, loss 18.08
sharpe 0.0428, t-value 7.009 trade % 31.04, big trade % 20.74, loss 2.786
sharpe 0.08478, t-value 13.66 trade % 30.04, big trade % 18.81, loss 1.146
sharpe 0.03782, t-

***** Running Evaluation *****
  Num examples = 172
  Batch size = 1


sharpe 0.08673, t-value 5.759 trade % 5.104, big trade % 0.03588, loss 0.998
sharpe 0.06866, t-value 5.271 trade % 6.822, big trade % 0.006944, loss 0.9985
sharpe 0.1303, t-value 8.754 trade % 5.223, big trade % 0.04514, loss 0.9963
sharpe 0.02602, t-value 1.765 trade % 5.328, big trade % 0.02894, loss 0.9998
sharpe 0.104, t-value 7.168 trade % 5.496, big trade % 0.01157, loss 0.9978
sharpe 0.109, t-value 8.321 trade % 6.744, big trade % 0.1204, loss 0.9975
sharpe 0.1321, t-value 9.897 trade % 6.5, big trade % 0.04282, loss 0.995
sharpe 0.1587, t-value 11.22 trade % 5.792, big trade % 0.04514, loss 0.9963
sharpe 0.006493, t-value 0.4944 trade % 6.709, big trade % 0.06366, loss 1.002
sharpe 0.1012, t-value 7.143 trade % 5.766, big trade % 0.006944, loss 0.997
sharpe 0.008047, t-value 0.6487 trade % 7.522, big trade % 0.0, loss 1.001
sharpe 0.1561, t-value 11.43 trade % 6.2, big trade % 0.01852, loss 0.9962
sharpe 0.06363, t-value 4.269 trade % 5.209, big trade % 0.04977, loss 0.9977
sha

***** Running Evaluation *****
  Num examples = 172
  Batch size = 1


sharpe 0.1104, t-value 11.07 trade % 11.63, big trade % 0.0, loss 0.9952
sharpe 0.06069, t-value 4.404 trade % 6.095, big trade % 0.1007, loss 0.9986
sharpe 0.1166, t-value 11.08 trade % 10.46, big trade % 0.0, loss 0.9946
sharpe 0.02325, t-value 2.568 trade % 14.12, big trade % 0.0, loss 1.005
sharpe 0.1143, t-value 9.889 trade % 8.666, big trade % 0.0, loss 0.9967
sharpe 0.06801, t-value 5.45 trade % 7.434, big trade % 0.0, loss 0.9986
sharpe 0.195, t-value 15.72 trade % 7.525, big trade % 0.0, loss 0.9883
sharpe 0.09306, t-value 8.549 trade % 9.768, big trade % 0.0, loss 0.9971
sharpe -0.004561, t-value -0.3438 trade % 6.576, big trade % 0.0, loss 1.002
sharpe 0.07029, t-value 6.585 trade % 10.16, big trade % 0.0, loss 0.9974
sharpe -0.00706, t-value -0.437 trade % 4.434, big trade % 0.0, loss 1.001
sharpe 0.01921, t-value 1.605 trade % 8.079, big trade % 0.0, loss 0.9999
sharpe 0.05685, t-value 6.016 trade % 12.96, big trade % 0.0, loss 0.9987
sharpe 0.1274, t-value 13.34 trade % 1

Saving model checkpoint to ./results\checkpoint-500
Configuration saved in ./results\checkpoint-500\config.json
Model weights saved in ./results\checkpoint-500\pytorch_model.bin


sharpe 0.01118, t-value 0.8223 trade % 6.265, big trade % 0.0, loss 1.005
sharpe 0.01553, t-value 1.042 trade % 5.207, big trade % 0.0, loss 1.003
sharpe 0.02301, t-value 1.483 trade % 4.81, big trade % 0.0, loss 1.001
sharpe 0.06189, t-value 4.227 trade % 5.399, big trade % 0.0, loss 0.9981
sharpe 0.1079, t-value 7.787 trade % 6.024, big trade % 0.0, loss 0.9948
sharpe 0.05776, t-value 4.581 trade % 7.279, big trade % 0.0, loss 1.003
sharpe 0.0442, t-value 3.817 trade % 8.629, big trade % 0.0, loss 1.013
sharpe 0.2313, t-value 20.86 trade % 9.41, big trade % 0.0, loss 0.9809
sharpe 0.03991, t-value 4.117 trade % 12.31, big trade % 0.0, loss 1.005
sharpe 0.04825, t-value 5.182 trade % 13.35, big trade % 0.01273, loss 1.01
sharpe 0.03606, t-value 3.852 trade % 13.21, big trade % 0.5579, loss 1.009
sharpe 0.1775, t-value 17.59 trade % 11.36, big trade % 0.3252, loss 0.9852
sharpe 0.0502, t-value 4.821 trade % 10.67, big trade % 1.064, loss 1.006
sharpe 0.1496, t-value 13.52 trade % 9.456

***** Running Evaluation *****
  Num examples = 172
  Batch size = 1


sharpe 0.1326, t-value 14.85 trade % 14.53, big trade % 0.06481, loss 0.9935
sharpe 0.02631, t-value 3.099 trade % 16.06, big trade % 0.1076, loss 1.0
sharpe -0.005713, t-value -0.6267 trade % 13.92, big trade % 0.0625, loss 1.004
sharpe 0.0558, t-value 6.444 trade % 15.44, big trade % 0.003472, loss 1.066
sharpe -0.009378, t-value -1.069 trade % 15.03, big trade % 0.08449, loss 1.002
sharpe 0.03587, t-value 4.192 trade % 15.81, big trade % 0.05787, loss 0.9995
sharpe 0.2054, t-value 23.81 trade % 15.55, big trade % 0.01505, loss 0.9839
sharpe -0.004869, t-value -0.5648 trade % 15.57, big trade % 0.08333, loss 1.002
sharpe 0.008029, t-value 0.9278 trade % 15.46, big trade % 0.01852, loss 1.011
sharpe 0.1634, t-value 18.97 trade % 15.6, big trade % 0.1343, loss 0.9895
sharpe 0.02509, t-value 2.959 trade % 16.1, big trade % 0.03819, loss 1.002
sharpe -0.01362, t-value -1.567 trade % 15.32, big trade % 0.1192, loss 1.002
sharpe 0.0831, t-value 9.571 trade % 15.35, big trade % 0.009259, lo

***** Running Evaluation *****
  Num examples = 172
  Batch size = 1


sharpe 0.1226, t-value 7.919 trade % 4.83, big trade % 0.0, loss 0.9975
sharpe 0.09556, t-value 6.018 trade % 4.591, big trade % 0.0, loss 0.9986
sharpe 0.1772, t-value 10.76 trade % 4.265, big trade % 0.0, loss 0.9959
sharpe 0.02557, t-value 1.69 trade % 5.057, big trade % 0.0, loss 0.9999
sharpe 0.1712, t-value 10.99 trade % 4.765, big trade % 0.0, loss 0.9975
sharpe 0.1527, t-value 9.42 trade % 4.405, big trade % 0.0, loss 0.998
sharpe 0.1768, t-value 10.98 trade % 4.467, big trade % 0.0, loss 0.9952
sharpe 0.2017, t-value 12.35 trade % 4.342, big trade % 0.0, loss 0.9968
sharpe 0.05415, t-value 3.4 trade % 4.564, big trade % 0.0, loss 0.9983
sharpe 0.0841, t-value 5.396 trade % 4.764, big trade % 0.0, loss 0.9982
sharpe 0.04247, t-value 2.915 trade % 5.454, big trade % 0.0, loss 0.999
sharpe 0.1842, t-value 11.64 trade % 4.621, big trade % 0.0, loss 0.9972
sharpe 0.0651, t-value 4.224 trade % 4.872, big trade % 0.0, loss 0.9982
sharpe 0.1289, t-value 8.127 trade % 4.603, big trade 

***** Running Evaluation *****
  Num examples = 172
  Batch size = 1


sharpe 0.1599, t-value 8.199 trade % 3.045, big trade % 0.0, loss 0.9981
sharpe 0.04569, t-value 2.973 trade % 4.9, big trade % 0.0, loss 0.9994
sharpe 0.1034, t-value 5.175 trade % 2.901, big trade % 0.0, loss 0.9976
sharpe 0.1061, t-value 5.18 trade % 2.761, big trade % 0.0, loss 0.9964
sharpe 0.1705, t-value 10.12 trade % 4.08, big trade % 0.0, loss 0.9975
sharpe 0.0923, t-value 5.53 trade % 4.155, big trade % 0.0, loss 0.9987
sharpe 0.1767, t-value 9.832 trade % 3.584, big trade % 0.0, loss 0.9951
sharpe 0.08619, t-value 4.936 trade % 3.797, big trade % 0.0, loss 0.9986
sharpe 0.1158, t-value 6.353 trade % 3.482, big trade % 0.0, loss 0.9924
sharpe 0.1354, t-value 6.882 trade % 2.989, big trade % 0.0, loss 0.9973
sharpe 0.09475, t-value 5.878 trade % 4.455, big trade % 0.0, loss 0.9961
sharpe 0.1745, t-value 10.32 trade % 4.045, big trade % 0.0, loss 0.9971
sharpe 0.1301, t-value 6.347 trade % 2.756, big trade % 0.0, loss 0.9965
sharpe 0.179, t-value 8.937 trade % 2.886, big trade 

Saving model checkpoint to ./results\checkpoint-1000
Configuration saved in ./results\checkpoint-1000\config.json
Model weights saved in ./results\checkpoint-1000\pytorch_model.bin


sharpe 0.1043, t-value 6.818 trade % 4.947, big trade % 0.0, loss 0.996
sharpe 0.09315, t-value 5.974 trade % 4.76, big trade % 0.0, loss 0.9966
sharpe 0.07403, t-value 4.641 trade % 4.549, big trade % 0.0, loss 0.9983
sharpe 0.1229, t-value 7.938 trade % 4.832, big trade % 0.0, loss 0.9937
sharpe 0.0819, t-value 5.72 trade % 5.645, big trade % 0.0, loss 0.9983
sharpe 0.1283, t-value 8.461 trade % 5.031, big trade % 0.0, loss 0.9945
sharpe 0.09157, t-value 6.421 trade % 5.691, big trade % 0.0, loss 0.9972
sharpe 0.07028, t-value 5.745 trade % 7.735, big trade % 0.0, loss 0.9977
sharpe 0.0874, t-value 7.569 trade % 8.68, big trade % 0.0, loss 0.9956
sharpe 0.1688, t-value 12.16 trade % 6.005, big trade % 0.0, loss 0.9938
sharpe 0.1886, t-value 14.77 trade % 7.103, big trade % 0.0, loss 0.9928
sharpe 0.01853, t-value 1.728 trade % 10.07, big trade % 0.0, loss 1.011
sharpe 0.02863, t-value 2.365 trade % 7.902, big trade % 0.0, loss 1.0
sharpe 0.08451, t-value 6.028 trade % 5.889, big trad

***** Running Evaluation *****
  Num examples = 172
  Batch size = 1


sharpe 0.0894, t-value 6.868 trade % 6.83, big trade % 0.0, loss 0.9978
sharpe 0.1334, t-value 10.07 trade % 6.588, big trade % 0.0, loss 0.9975
sharpe 0.1942, t-value 13.24 trade % 5.38, big trade % 0.0, loss 0.994
sharpe 0.02014, t-value 1.5 trade % 6.424, big trade % 0.0, loss 1.001
sharpe 0.1751, t-value 13.44 trade % 6.818, big trade % 0.0, loss 0.9965
sharpe 0.2064, t-value 14.39 trade % 5.625, big trade % 0.0, loss 0.9965
sharpe 0.1289, t-value 9.271 trade % 5.985, big trade % 0.0, loss 0.9957
sharpe 0.2484, t-value 17.55 trade % 5.778, big trade % 0.0, loss 0.9947
sharpe 0.06953, t-value 5.013 trade % 6.015, big trade % 0.0, loss 0.9972
sharpe 0.07739, t-value 5.778 trade % 6.452, big trade % 0.0, loss 0.9979
sharpe 0.04995, t-value 4.02 trade % 7.497, big trade % 0.0, loss 0.9986
sharpe 0.2868, t-value 20.87 trade % 6.129, big trade % 0.0, loss 0.9943
sharpe 0.05767, t-value 4.272 trade % 6.349, big trade % 0.0, loss 0.9983
sharpe 0.08216, t-value 5.895 trade % 5.959, big trad

***** Running Evaluation *****
  Num examples = 172
  Batch size = 1


sharpe 0.1203, t-value 7.274 trade % 4.234, big trade % 0.0, loss 0.9979
sharpe 0.09223, t-value 5.654 trade % 4.349, big trade % 0.0, loss 0.9988
sharpe 0.1717, t-value 8.531 trade % 2.858, big trade % 0.0, loss 0.9961
sharpe 0.048, t-value 2.741 trade % 3.774, big trade % 0.0, loss 0.9987
sharpe 0.174, t-value 10.64 trade % 4.327, big trade % 0.0, loss 0.9974
sharpe 0.155, t-value 8.778 trade % 3.71, big trade % 0.0, loss 0.9979
sharpe 0.1375, t-value 7.469 trade % 3.414, big trade % 0.0, loss 0.9963
sharpe 0.1543, t-value 9.062 trade % 3.993, big trade % 0.0, loss 0.9975
sharpe 0.101, t-value 5.487 trade % 3.414, big trade % 0.0, loss 0.9943
sharpe 0.0824, t-value 4.927 trade % 4.138, big trade % 0.0, loss 0.9981
sharpe 0.06977, t-value 4.415 trade % 4.635, big trade % 0.0, loss 0.9977
sharpe 0.2037, t-value 12.07 trade % 4.064, big trade % 0.0, loss 0.9967
sharpe 0.07915, t-value 4.592 trade % 3.896, big trade % 0.0, loss 0.9976
sharpe 0.1102, t-value 6.066 trade % 3.506, big trade

Saving model checkpoint to ./results\checkpoint-1500
Configuration saved in ./results\checkpoint-1500\config.json
Model weights saved in ./results\checkpoint-1500\pytorch_model.bin


sharpe 0.1192, t-value 11.26 trade % 10.34, big trade % 0.0, loss 0.9908
sharpe 0.08801, t-value 8.299 trade % 10.29, big trade % 0.0, loss 0.9973
sharpe 0.1131, t-value 10.58 trade % 10.12, big trade % 0.0, loss 0.9916
sharpe 0.1099, t-value 8.922 trade % 7.634, big trade % 0.0, loss 0.995
sharpe 0.08489, t-value 7.312 trade % 8.586, big trade % 0.0, loss 0.9962
sharpe 0.09765, t-value 7.787 trade % 7.359, big trade % 0.0, loss 0.9948
sharpe 0.1654, t-value 12.68 trade % 6.802, big trade % 0.0, loss 0.9879
sharpe 0.08029, t-value 5.359 trade % 5.156, big trade % 0.0, loss 0.9968
sharpe 0.07183, t-value 5.218 trade % 6.107, big trade % 0.0, loss 0.9978
sharpe 0.07703, t-value 5.281 trade % 5.44, big trade % 0.0, loss 0.9967
sharpe 0.03155, t-value 2.196 trade % 5.607, big trade % 0.0, loss 1.003
sharpe 0.01242, t-value 0.9439 trade % 6.681, big trade % 0.0, loss 1.003
sharpe 0.1144, t-value 8.293 trade % 6.086, big trade % 0.0, loss 0.9942
sharpe 0.1179, t-value 8.265 trade % 5.687, bi

***** Running Evaluation *****
  Num examples = 172
  Batch size = 1


sharpe 0.1215, t-value 10.39 trade % 8.471, big trade % 0.0, loss 0.9963
sharpe 0.1092, t-value 9.945 trade % 9.596, big trade % 0.0, loss 0.9973
sharpe 0.1798, t-value 12.96 trade % 6.01, big trade % 0.0, loss 0.993
sharpe 0.05414, t-value 4.563 trade % 8.221, big trade % 0.0, loss 0.9988
sharpe 0.1891, t-value 15.97 trade % 8.256, big trade % 0.0, loss 0.9952
sharpe 0.1827, t-value 15.01 trade % 7.806, big trade % 0.0, loss 0.9957
sharpe 0.1822, t-value 15.02 trade % 7.869, big trade % 0.0, loss 0.9909
sharpe 0.2, t-value 16.52 trade % 7.903, big trade % 0.0, loss 0.9941
sharpe 0.0901, t-value 7.308 trade % 7.615, big trade % 0.0, loss 0.995
sharpe 0.1185, t-value 10.4 trade % 8.908, big trade % 0.0, loss 0.9951
sharpe 0.06844, t-value 6.522 trade % 10.51, big trade % 0.0, loss 0.9974
sharpe 0.2605, t-value 22.25 trade % 8.442, big trade % 0.0, loss 0.9925
sharpe 0.09033, t-value 7.449 trade % 7.871, big trade % 0.0, loss 0.9955
sharpe 0.1264, t-value 10.27 trade % 7.635, big trade %

***** Running Evaluation *****
  Num examples = 172
  Batch size = 1


sharpe 0.1163, t-value 6.932 trade % 4.113, big trade % 0.0, loss 0.998
sharpe 0.04441, t-value 2.988 trade % 5.242, big trade % 0.0, loss 0.9994
sharpe 0.1133, t-value 5.826 trade % 3.061, big trade % 0.0, loss 0.997
sharpe 0.09597, t-value 5.541 trade % 3.859, big trade % 0.0, loss 0.995
sharpe 0.1731, t-value 11.16 trade % 4.81, big trade % 0.0, loss 0.997
sharpe 0.09733, t-value 6.073 trade % 4.506, big trade % 0.0, loss 0.9985
sharpe 0.1738, t-value 9.699 trade % 3.606, big trade % 0.0, loss 0.9934
sharpe 0.1127, t-value 7.306 trade % 4.863, big trade % 0.0, loss 0.9977
sharpe 0.1366, t-value 7.74 trade % 3.716, big trade % 0.0, loss 0.989
sharpe 0.1244, t-value 7.771 trade % 4.515, big trade % 0.0, loss 0.9961
sharpe 0.102, t-value 6.243 trade % 4.336, big trade % 0.0, loss 0.9952
sharpe 0.2133, t-value 13.92 trade % 4.932, big trade % 0.0, loss 0.9957
sharpe 0.1109, t-value 6.598 trade % 4.094, big trade % 0.0, loss 0.995
sharpe 0.1457, t-value 8.645 trade % 4.072, big trade % 0

***** Running Evaluation *****
  Num examples = 172
  Batch size = 1


sharpe 0.1263, t-value 7.939 trade % 4.573, big trade % 0.0, loss 0.9973
sharpe 0.0582, t-value 3.754 trade % 4.815, big trade % 0.0, loss 0.9992
sharpe 0.1127, t-value 5.614 trade % 2.874, big trade % 0.0, loss 0.9967
sharpe 0.09415, t-value 6.642 trade % 5.761, big trade % 0.0, loss 0.9943
sharpe 0.1472, t-value 9.397 trade % 4.715, big trade % 0.0, loss 0.9974
sharpe 0.1378, t-value 8.466 trade % 4.371, big trade % 0.0, loss 0.9978
sharpe 0.1884, t-value 10.67 trade % 3.712, big trade % 0.0, loss 0.9907
sharpe 0.1175, t-value 7.793 trade % 5.087, big trade % 0.0, loss 0.9973
sharpe 0.134, t-value 7.464 trade % 3.593, big trade % 0.0, loss 0.9889
sharpe 0.1336, t-value 9.079 trade % 5.344, big trade % 0.0, loss 0.9945
sharpe 0.11, t-value 6.446 trade % 3.975, big trade % 0.0, loss 0.9941
sharpe 0.2158, t-value 14.31 trade % 5.092, big trade % 0.0, loss 0.9953
sharpe 0.1105, t-value 7.544 trade % 5.392, big trade % 0.0, loss 0.9936
sharpe 0.1357, t-value 9.332 trade % 5.473, big trade

Saving model checkpoint to ./results\checkpoint-2000
Configuration saved in ./results\checkpoint-2000\config.json
Model weights saved in ./results\checkpoint-2000\pytorch_model.bin


sharpe 0.09859, t-value 7.49 trade % 6.681, big trade % 0.0, loss 0.9949
sharpe 0.06097, t-value 4.24 trade % 5.597, big trade % 0.0, loss 0.998
sharpe 0.1263, t-value 9.796 trade % 6.959, big trade % 0.0, loss 0.9938
sharpe 0.04935, t-value 3.731 trade % 6.615, big trade % 0.0, loss 0.9988
sharpe 0.1003, t-value 7.096 trade % 5.797, big trade % 0.0, loss 0.9971
sharpe 0.1437, t-value 10.05 trade % 5.667, big trade % 0.0, loss 0.9889
sharpe 0.08701, t-value 6.93 trade % 7.342, big trade % 0.0, loss 0.9972
sharpe 0.08312, t-value 6.6 trade % 7.297, big trade % 0.0, loss 0.9964
sharpe 0.166, t-value 12.47 trade % 6.535, big trade % 0.0, loss 0.993
sharpe 0.1029, t-value 8.009 trade % 7.014, big trade % 0.0, loss 0.9954
sharpe 0.1724, t-value 12.94 trade % 6.525, big trade % 0.0, loss 0.9887
sharpe 0.05604, t-value 4.423 trade % 7.21, big trade % 0.0, loss 0.9986
sharpe 0.09646, t-value 7.629 trade % 7.239, big trade % 0.0, loss 0.9972
sharpe 0.1279, t-value 11.9 trade % 10.01, big trade 

***** Running Evaluation *****
  Num examples = 172
  Batch size = 1


sharpe 0.1336, t-value 8.787 trade % 5.007, big trade % 0.0, loss 0.997
sharpe 0.06409, t-value 5.257 trade % 7.789, big trade % 0.0, loss 0.9988
sharpe 0.1305, t-value 7.379 trade % 3.7, big trade % 0.0, loss 0.9952
sharpe 0.1096, t-value 7.137 trade % 4.907, big trade % 0.0, loss 0.9921
sharpe 0.1815, t-value 13.17 trade % 6.096, big trade % 0.0, loss 0.9957
sharpe 0.1343, t-value 10.56 trade % 7.151, big trade % 0.0, loss 0.9969
sharpe 0.2008, t-value 14.59 trade % 6.11, big trade % 0.0, loss 0.9883
sharpe 0.143, t-value 10.68 trade % 6.455, big trade % 0.0, loss 0.9959
sharpe 0.1126, t-value 8.424 trade % 6.473, big trade % 0.0, loss 0.991
sharpe 0.1399, t-value 10.42 trade % 6.42, big trade % 0.0, loss 0.9936
sharpe 0.09064, t-value 7.371 trade % 7.655, big trade % 0.0, loss 0.9951
sharpe 0.2038, t-value 15.8 trade % 6.956, big trade % 0.0, loss 0.9942
sharpe 0.1243, t-value 8.378 trade % 5.256, big trade % 0.0, loss 0.9918
sharpe 0.1591, t-value 10.21 trade % 4.767, big trade % 0

***** Running Evaluation *****
  Num examples = 172
  Batch size = 1


sharpe 0.1283, t-value 9.622 trade % 6.51, big trade % 0.0, loss 0.9964
sharpe 0.06848, t-value 4.742 trade % 5.55, big trade % 0.0, loss 0.9989
sharpe 0.1239, t-value 7.476 trade % 4.211, big trade % 0.0, loss 0.9953
sharpe 0.07377, t-value 6.529 trade % 9.067, big trade % 0.0, loss 0.9974
sharpe 0.1425, t-value 10.92 trade % 6.796, big trade % 0.0, loss 0.9967
sharpe 0.1275, t-value 7.932 trade % 4.476, big trade % 0.0, loss 0.9977
sharpe 0.1773, t-value 10.06 trade % 3.726, big trade % 0.0, loss 0.9904
sharpe 0.1458, t-value 11.41 trade % 7.083, big trade % 0.0, loss 0.9956
sharpe 0.1357, t-value 7.73 trade % 3.756, big trade % 0.0, loss 0.9916
sharpe 0.1331, t-value 10.19 trade % 6.785, big trade % 0.0, loss 0.9937
sharpe 0.1247, t-value 6.553 trade % 3.198, big trade % 0.0, loss 0.992
sharpe 0.2243, t-value 16.44 trade % 6.223, big trade % 0.0, loss 0.9938
sharpe 0.09842, t-value 8.051 trade % 7.745, big trade % 0.0, loss 0.9943
sharpe 0.1228, t-value 9.951 trade % 7.604, big trad

Saving model checkpoint to ./results\checkpoint-2500
Configuration saved in ./results\checkpoint-2500\config.json
Model weights saved in ./results\checkpoint-2500\pytorch_model.bin


sharpe 0.07834, t-value 6.445 trade % 7.833, big trade % 0.0, loss 0.9972
sharpe 0.1234, t-value 9.51 trade % 6.873, big trade % 0.0, loss 0.9943
sharpe 0.1598, t-value 12.98 trade % 7.643, big trade % 0.0, loss 0.9938
sharpe 0.1136, t-value 9.154 trade % 7.512, big trade % 0.0, loss 0.9946
sharpe 0.1735, t-value 13.12 trade % 6.615, big trade % 0.0, loss 0.9902
sharpe 0.06637, t-value 4.778 trade % 5.997, big trade % 0.0, loss 0.9974
sharpe 0.1937, t-value 17.33 trade % 9.262, big trade % 0.0, loss 0.9901
sharpe 0.1163, t-value 9.387 trade % 7.537, big trade % 0.0, loss 0.995
sharpe 0.1247, t-value 9.564 trade % 6.803, big trade % 0.0, loss 0.9966
sharpe 0.1245, t-value 12.03 trade % 10.8, big trade % 0.0, loss 0.9924
sharpe 0.07605, t-value 6.713 trade % 9.016, big trade % 0.0, loss 0.9979
sharpe 0.08778, t-value 6.919 trade % 7.19, big trade % 0.0, loss 0.9961
sharpe 0.09241, t-value 7.243 trade % 7.109, big trade % 0.0, loss 0.9945
sharpe 0.1318, t-value 11.3 trade % 8.505, big tra

***** Running Evaluation *****
  Num examples = 172
  Batch size = 1


sharpe 0.1584, t-value 11.4 trade % 5.995, big trade % 0.0, loss 0.9952
sharpe 0.052, t-value 3.841 trade % 6.315, big trade % 0.002315, loss 0.9991
sharpe 0.1244, t-value 6.521 trade % 3.178, big trade % 0.0, loss 0.9951
sharpe 0.07787, t-value 6.946 trade % 9.21, big trade % 0.0, loss 0.9969
sharpe 0.163, t-value 11.92 trade % 6.192, big trade % 0.0, loss 0.9962
sharpe 0.1114, t-value 7.549 trade % 5.311, big trade % 0.0, loss 0.9978
sharpe 0.2042, t-value 12.94 trade % 4.642, big trade % 0.0, loss 0.9881
sharpe 0.1392, t-value 10.52 trade % 6.614, big trade % 0.0, loss 0.9957
sharpe 0.09955, t-value 5.835 trade % 3.977, big trade % 0.0, loss 0.9943
sharpe 0.1434, t-value 11.4 trade % 7.31, big trade % 0.0, loss 0.9929
sharpe 0.1084, t-value 6.886 trade % 4.67, big trade % 0.0, loss 0.9933
sharpe 0.2162, t-value 15.58 trade % 6.009, big trade % 0.0, loss 0.9939
sharpe 0.1032, t-value 8.494 trade % 7.844, big trade % 0.0, loss 0.9936
sharpe 0.1414, t-value 11.25 trade % 7.326, big tra

***** Running Evaluation *****
  Num examples = 172
  Batch size = 1


sharpe 0.1562, t-value 10.93 trade % 5.664, big trade % 0.0, loss 0.9955
sharpe 0.05056, t-value 3.817 trade % 6.599, big trade % 0.0, loss 0.9991
sharpe 0.1225, t-value 6.412 trade % 3.171, big trade % 0.0, loss 0.9951
sharpe 0.08017, t-value 6.855 trade % 8.462, big trade % 0.0, loss 0.9964
sharpe 0.164, t-value 12.09 trade % 6.292, big trade % 0.0, loss 0.996
sharpe 0.1099, t-value 7.611 trade % 5.55, big trade % 0.0, loss 0.9978
sharpe 0.2032, t-value 13.02 trade % 4.754, big trade % 0.0, loss 0.9883
sharpe 0.1415, t-value 10.62 trade % 6.525, big trade % 0.0, loss 0.9956
sharpe 0.1021, t-value 6.108 trade % 4.141, big trade % 0.0, loss 0.9934
sharpe 0.1461, t-value 11.38 trade % 7.018, big trade % 0.0, loss 0.9929
sharpe 0.1078, t-value 7.102 trade % 5.022, big trade % 0.0, loss 0.9931
sharpe 0.2137, t-value 15.58 trade % 6.157, big trade % 0.0, loss 0.9938
sharpe 0.1077, t-value 8.563 trade % 7.321, big trade % 0.0, loss 0.9933
sharpe 0.1437, t-value 11.09 trade % 6.888, big trad

***** Running Evaluation *****
  Num examples = 172
  Batch size = 1


sharpe 0.142, t-value 9.534 trade % 5.218, big trade % 0.0, loss 0.9964
sharpe 0.05013, t-value 3.475 trade % 5.561, big trade % 0.0, loss 0.9992
sharpe 0.1159, t-value 5.824 trade % 2.925, big trade % 0.0, loss 0.9957
sharpe 0.08138, t-value 6.576 trade % 7.558, big trade % 0.0, loss 0.996
sharpe 0.157, t-value 11.09 trade % 5.773, big trade % 0.0, loss 0.9965
sharpe 0.1101, t-value 6.965 trade % 4.636, big trade % 0.0, loss 0.998
sharpe 0.1955, t-value 11.26 trade % 3.839, big trade % 0.0, loss 0.9898
sharpe 0.1365, t-value 9.685 trade % 5.824, big trade % 0.0, loss 0.9962
sharpe 0.1143, t-value 6.158 trade % 3.361, big trade % 0.0, loss 0.9924
sharpe 0.1479, t-value 10.73 trade % 6.093, big trade % 0.0, loss 0.9934
sharpe 0.1139, t-value 6.713 trade % 4.019, big trade % 0.0, loss 0.9927
sharpe 0.2162, t-value 14.75 trade % 5.39, big trade % 0.0, loss 0.9942
sharpe 0.1097, t-value 8.202 trade % 6.467, big trade % 0.0, loss 0.9934
sharpe 0.1402, t-value 10.24 trade % 6.178, big trade 

Saving model checkpoint to ./results\checkpoint-3000
Configuration saved in ./results\checkpoint-3000\config.json
Model weights saved in ./results\checkpoint-3000\pytorch_model.bin


sharpe 0.1196, t-value 10.46 trade % 8.865, big trade % 0.0, loss 0.9948
sharpe 0.1547, t-value 12.32 trade % 7.344, big trade % 0.0, loss 0.996
sharpe 0.05948, t-value 4.497 trade % 6.616, big trade % 0.0, loss 0.9987
sharpe 0.07496, t-value 6.221 trade % 7.97, big trade % 0.0, loss 0.9971
sharpe 0.1804, t-value 17.09 trade % 10.39, big trade % 0.0, loss 0.9877
sharpe 0.02308, t-value 1.921 trade % 8.019, big trade % 0.0, loss 1.001
sharpe 0.1388, t-value 11.81 trade % 8.385, big trade % 0.0, loss 0.9934
sharpe 0.1457, t-value 14.33 trade % 11.2, big trade % 0.0, loss 0.9956
sharpe 0.1274, t-value 11.99 trade % 10.25, big trade % 0.0, loss 0.993
sharpe 0.04808, t-value 2.83 trade % 4.011, big trade % 0.0, loss 0.9987
sharpe 0.05727, t-value 4.798 trade % 8.126, big trade % 0.0, loss 0.9982
sharpe 0.1214, t-value 10.08 trade % 7.975, big trade % 0.0, loss 0.9947
sharpe 0.08571, t-value 7.563 trade % 9.011, big trade % 0.0, loss 0.9963
sharpe 0.05949, t-value 3.875 trade % 4.91, big tra



Training completed. Do not forget to share your model on huggingface.co/models =)




TrainOutput(global_step=3098, training_loss=3571307937434.048, metrics={'train_runtime': 1079.3329, 'train_samples_per_second': 2.87, 'train_steps_per_second': 2.87, 'total_flos': 0.0, 'train_loss': 3571307937434.048, 'epoch': 1.0})

In [9]:
del trainer
del model
torch.cuda.empty_cache()

# Appendix

## quick timing check

In [None]:
model = GPT2Trader(config).cuda()

In [72]:
%%timeit
fake_data = torch.randn(4, 391, 256)
fake_data = fake_data.cuda()
model(fake_data)
cpu = fake_data.cpu()

28.2 ms ± 2.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [73]:
model = GPT2Trader(config).cpu()

Using 9 layers


In [74]:
%%timeit
fake_data = torch.randn(4, 391, 256)
model(fake_data)

748 ms ± 82.3 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
