In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from transformers import GPT2Model, GPT2Config, PreTrainedModel
from datasets import load_dataset, Dataset, DatasetDict

from torch.utils.data import DataLoader
from transformers import Trainer, TrainingArguments

import math

In [2]:
class GPT2Trader(PreTrainedModel):
    def __init__(self, config):
        super().__init__(config)
        
        # use levine 2020 layer numbers
        n_layer = round((math.log(config.n_embd) - 5.039) / 5.55e-2)
        n_layer = max(1, n_layer)
        print(f'Using {n_layer} layers')
        config.n_layer = n_layer
        
        config.initializer_range = 1 / math.sqrt(config.n_embd)
        
        self.embed = nn.Linear(5, config.n_embd, bias = False)
        self.norm = nn.LayerNorm(config.n_embd)
        self.gpt = GPT2Model(config)
        self.trade = nn.Linear(config.n_embd, 60, bias = False)
        
        self.trade_sign = nn.Parameter(torch.Tensor([1, -1]), requires_grad = False)
        
    def forward(self, ohlcv, future):
        embed = self.norm(self.embed(ohlcv))
        hidden = self.gpt(inputs_embeds = embed).last_hidden_state
        
        soft_trade = self.trade(hidden)
        
        # sharpe information
        soft_trade = torch.tanh(soft_trade).reshape(-1)
        future = future.reshape(-1)
        soft_profit = soft_trade * future
        
        # the exp is so that loss is purely positive and minimizes toward 0 (also losses have more loss than profit)
        loss_ppl = (-soft_profit + future.abs()).mean()
        
        # penalty for big trades (to stop trading from happening with no profit)
        trade_penalty = soft_trade.abs().mean()
        
        loss = loss_ppl + .1 * trade_penalty # .1 means that a 100% position must make at least .1 of a std
        
        trades = soft_trade.abs().sum()
        print(f'sharpe {soft_profit.mean() / soft_profit.std():.4}, t-value {(soft_profit.mean() * trades**.5) / soft_profit.std():.4} '
              f'trade % {trades * 100 / len(soft_profit):,.4}, big trade % {(soft_trade.abs() > .8).sum() * 100 / len(soft_profit):.4}, loss {loss_ppl.item():.4}')
        
        return {"loss": loss}

In [3]:
class TraderTrainer(Trainer):
#     def compute_loss(self, model, inputs, return_outputs=False):
#         labels = inputs.get("labels")
#         # forward pass
#         outputs = model(**inputs)
#         logits = outputs.get("logits")
#         # compute custom loss (suppose one has 3 labels with different weights)
#         loss_fct = nn.CrossEntropyLoss(weight=torch.tensor([1.0, 2.0, 3.0]))
#         loss = loss_fct(logits.view(-1, self.model.config.num_labels), labels.view(-1))
#         return (loss, outputs) if return_outputs else loss

    def get_train_dataloader(self) -> DataLoader:
        """
        Returns the training :class:`~torch.utils.data.DataLoader`.

        Will use no sampler if :obj:`self.train_dataset` does not implement :obj:`__len__`, a random sampler (adapted
        to distributed training if necessary) otherwise.

        Subclass and override this method if you want to inject some custom behavior.
        """
        if self.train_dataset is None:
            raise ValueError("Trainer: training requires a train_dataset.")
        train_sampler = self._get_train_sampler()

        return DataLoader(
            self.train_dataset,
            batch_size=self.args.train_batch_size,
            shuffle=False, # IMPORTANT TO STOP OVERFITTING
#             sampler=train_sampler,
            collate_fn=self.data_collator,
            drop_last=self.args.dataloader_drop_last,
            num_workers=self.args.dataloader_num_workers,
        )

In [4]:
eurusd = Dataset.from_parquet('data/EURUSD_day.pq')

# make splits
split = eurusd.train_test_split(.1, shuffle = False)
valid_test = split['test'].train_test_split(.5, shuffle = False)
eurusd = DatasetDict({
    'train': split['train'],
    'validation': valid_test['train'],
    'test': valid_test['test']
})

Using custom data configuration default-524475b7137ed866
Found cached dataset parquet (C:/Users/micha/.cache/huggingface/datasets/parquet/default-524475b7137ed866/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)


In [9]:
training_args = TrainingArguments(
    output_dir = "./results",
    logging_strategy = "steps",
    evaluation_strategy = "steps",
    logging_steps = 200,
    eval_steps = 200,
    report_to = "none",
    learning_rate = 5e-4,
    lr_scheduler_type = "cosine",
    warmup_ratio = .05,
    num_train_epochs = 1,
    per_device_train_batch_size = 1,
    per_device_eval_batch_size = 1,
    max_grad_norm = 1,
)

PyTorch: setting up devices


In [10]:
config = GPT2Config(
    n_embd = 320, n_head = 5, vocab_size = 0, n_positions = 2000,
    resid_pdrop = .1, embd_pdrop = .1, attn_pdrop = .1,
#     resid_pdrop = .0, embd_pdrop = .0, attn_pdrop = .0,
    scale_attn_by_inverse_layer_idx = True, use_cache = False
)

In [11]:
model = GPT2Trader(config).cuda()
trainer = TraderTrainer(
    model = model,
    args = training_args,
    train_dataset = eurusd['train'],
    eval_dataset = eurusd['validation']
)

Using 13 layers


In [12]:
trainer.train()

***** Running training *****
  Num examples = 3098
  Num Epochs = 1
  Instantaneous batch size per device = 1
  Total train batch size (w. parallel, distributed & accumulation) = 1
  Gradient Accumulation steps = 1
  Total optimization steps = 3098
Could not estimate the number of tokens of the input, floating-point operations will not be computed


sharpe 0.02251, t-value 4.239 trade % 41.04, big trade % 7.137, loss 1.784


Step,Training Loss,Validation Loss
200,0.8583,0.325155
400,0.6841,0.312105
600,0.6859,0.326843
800,0.7179,0.304278
1000,0.4622,0.298933
1200,0.4486,0.298155
1400,0.3166,0.29462
1600,0.4563,0.297838
1800,0.5199,0.295943
2000,0.388,0.29044


sharpe 0.009464, t-value 1.774 trade % 40.69, big trade % 7.214, loss 1.429
sharpe 0.01071, t-value 2.047 trade % 42.27, big trade % 9.072, loss 1.472
sharpe 0.03608, t-value 6.811 trade % 41.25, big trade % 8.212, loss 1.316
sharpe 0.01752, t-value 3.188 trade % 38.3, big trade % 4.85, loss 1.485
sharpe 0.07246, t-value 13.34 trade % 39.24, big trade % 7.115, loss 1.134
sharpe 0.009567, t-value 1.784 trade % 40.26, big trade % 6.497, loss 1.247
sharpe 0.09239, t-value 17.87 trade % 43.28, big trade % 10.67, loss 1.029
sharpe 0.1743, t-value 33.94 trade % 43.85, big trade % 10.53, loss 1.11
sharpe 0.04428, t-value 9.19 trade % 49.85, big trade % 15.87, loss 1.211
sharpe 0.07284, t-value 15.03 trade % 49.29, big trade % 13.56, loss 1.125
sharpe 0.02509, t-value 5.57 trade % 57.03, big trade % 19.72, loss 1.302
sharpe 0.1209, t-value 26.59 trade % 56.05, big trade % 20.22, loss 1.233
sharpe 0.2662, t-value 57.11 trade % 53.27, big trade % 18.04, loss 0.9574
sharpe 0.0445, t-value 10.14 t

***** Running Evaluation *****
  Num examples = 172
  Batch size = 1


sharpe 0.1921, t-value 34.76 trade % 37.91, big trade % 18.34, loss 0.2702
sharpe 0.05152, t-value 12.12 trade % 64.1, big trade % 49.55, loss 0.233
sharpe 0.1269, t-value 23.86 trade % 40.92, big trade % 22.77, loss 0.3134
sharpe 0.07226, t-value 13.06 trade % 37.81, big trade % 17.27, loss 0.3258
sharpe 0.1164, t-value 23.88 trade % 48.74, big trade % 31.33, loss 0.2277
sharpe 0.09034, t-value 20.62 trade % 60.32, big trade % 46.06, loss 0.2166
sharpe 0.2049, t-value 45.54 trade % 57.19, big trade % 43.18, loss 0.2708
sharpe 0.1495, t-value 30.89 trade % 49.42, big trade % 32.16, loss 0.2482
sharpe 0.007198, t-value 1.646 trade % 60.52, big trade % 47.47, loss 0.3825
sharpe 0.1683, t-value 34.73 trade % 49.29, big trade % 32.26, loss 0.2487
sharpe 0.006675, t-value 1.666 trade % 72.11, big trade % 60.44, loss 0.3202
sharpe 0.1236, t-value 26.78 trade % 54.32, big trade % 39.07, loss 0.2255
sharpe 0.1173, t-value 21.54 trade % 39.01, big trade % 18.87, loss 0.2834
sharpe 0.1761, t-val

***** Running Evaluation *****
  Num examples = 172
  Batch size = 1


sharpe 0.08468, t-value 15.78 trade % 40.21, big trade % 24.04, loss 0.2884
sharpe 0.02912, t-value 5.203 trade % 36.96, big trade % 24.23, loss 0.2424
sharpe 0.1251, t-value 22.41 trade % 37.12, big trade % 20.73, loss 0.3182
sharpe 0.06961, t-value 14.24 trade % 48.41, big trade % 33.84, loss 0.3095
sharpe 0.1035, t-value 18.54 trade % 37.17, big trade % 22.27, loss 0.2365
sharpe 0.1223, t-value 22.99 trade % 40.89, big trade % 28.2, loss 0.2194
sharpe 0.1476, t-value 25.95 trade % 35.8, big trade % 21.56, loss 0.2957
sharpe 0.04389, t-value 8.279 trade % 41.19, big trade % 25.87, loss 0.2697
sharpe 0.1524, t-value 28.2 trade % 39.65, big trade % 24.94, loss 0.2586
sharpe 0.08953, t-value 14.84 trade % 31.79, big trade % 19.55, loss 0.2688
sharpe 0.1197, t-value 20.3 trade % 33.32, big trade % 21.28, loss 0.2718
sharpe 0.1661, t-value 31.88 trade % 42.66, big trade % 28.71, loss 0.2274
sharpe 0.08958, t-value 16.87 trade % 41.04, big trade % 26.14, loss 0.2786
sharpe 0.1094, t-value 

Saving model checkpoint to ./results\checkpoint-500
Configuration saved in ./results\checkpoint-500\config.json
Model weights saved in ./results\checkpoint-500\pytorch_model.bin


sharpe 0.00762, t-value 2.099 trade % 87.81, big trade % 84.26, loss 0.8487
sharpe 0.02311, t-value 6.418 trade % 89.26, big trade % 88.34, loss 0.7572
sharpe 0.01937, t-value 5.461 trade % 92.02, big trade % 92.15, loss 0.7298
sharpe 0.3523, t-value 84.61 trade % 66.76, big trade % 55.09, loss 0.4616
sharpe 0.283, t-value 69.2 trade % 69.21, big trade % 60.5, loss 0.4884
sharpe 0.1156, t-value 25.63 trade % 56.91, big trade % 35.13, loss 0.7241
sharpe 0.08379, t-value 20.14 trade % 66.86, big trade % 51.03, loss 0.8362
sharpe 0.3215, t-value 77.19 trade % 66.71, big trade % 57.39, loss 0.572
sharpe 0.03951, t-value 11.06 trade % 90.73, big trade % 88.29, loss 0.6039
sharpe 0.1131, t-value 27.65 trade % 69.22, big trade % 51.41, loss 0.757
sharpe 0.08195, t-value 18.21 trade % 57.16, big trade % 40.34, loss 0.6889
sharpe 0.2008, t-value 49.75 trade % 71.05, big trade % 64.44, loss 0.6259
sharpe 0.1457, t-value 30.92 trade % 52.1, big trade % 42.09, loss 0.6479
sharpe 0.1507, t-value 38

***** Running Evaluation *****
  Num examples = 172
  Batch size = 1


sharpe 0.06774, t-value 15.39 trade % 59.75, big trade % 24.89, loss 0.2865
sharpe 0.02901, t-value 6.565 trade % 59.25, big trade % 30.17, loss 0.2403
sharpe 0.1579, t-value 35.04 trade % 57.03, big trade % 21.02, loss 0.2981
sharpe 0.04262, t-value 9.793 trade % 61.12, big trade % 29.11, loss 0.3295
sharpe 0.1517, t-value 34.21 trade % 58.83, big trade % 26.32, loss 0.2214
sharpe 0.1384, t-value 31.47 trade % 59.84, big trade % 30.61, loss 0.2112
sharpe 0.0625, t-value 13.7 trade % 55.65, big trade % 23.15, loss 0.3234
sharpe 0.1055, t-value 24.0 trade % 59.88, big trade % 27.7, loss 0.2543
sharpe 0.1887, t-value 43.1 trade % 60.39, big trade % 31.5, loss 0.2241
sharpe 0.03195, t-value 6.732 trade % 51.37, big trade % 14.94, loss 0.2842
sharpe 0.144, t-value 31.73 trade % 56.21, big trade % 27.76, loss 0.2577
sharpe 0.1801, t-value 41.11 trade % 60.3, big trade % 29.94, loss 0.2161
sharpe 0.06188, t-value 13.97 trade % 59.0, big trade % 24.57, loss 0.2929
sharpe 0.06974, t-value 15.7

***** Running Evaluation *****
  Num examples = 172
  Batch size = 1


sharpe 0.09942, t-value 14.75 trade % 25.49, big trade % 20.8, loss 0.2907
sharpe 0.05654, t-value 10.63 trade % 40.92, big trade % 37.6, loss 0.2382
sharpe 0.1387, t-value 20.99 trade % 26.5, big trade % 22.67, loss 0.3169
sharpe 0.08846, t-value 11.45 trade % 19.4, big trade % 13.91, loss 0.3232
sharpe 0.1796, t-value 29.67 trade % 31.56, big trade % 28.12, loss 0.226
sharpe 0.1546, t-value 28.45 trade % 39.2, big trade % 35.61, loss 0.2128
sharpe 0.1615, t-value 27.5 trade % 33.53, big trade % 29.28, loss 0.2917
sharpe 0.1323, t-value 21.05 trade % 29.3, big trade % 25.68, loss 0.2566
sharpe 0.1074, t-value 21.36 trade % 45.81, big trade % 42.15, loss 0.2947
sharpe 0.1186, t-value 14.19 trade % 16.57, big trade % 9.961, loss 0.2647
sharpe 0.07467, t-value 16.17 trade % 54.28, big trade % 51.24, loss 0.2884
sharpe 0.2031, t-value 35.21 trade % 34.79, big trade % 31.52, loss 0.2203
sharpe 0.1259, t-value 16.0 trade % 18.7, big trade % 13.31, loss 0.2658
sharpe 0.1273, t-value 16.97 tr

***** Running Evaluation *****
  Num examples = 172
  Batch size = 1


sharpe 0.1438, t-value 22.3 trade % 27.86, big trade % 25.43, loss 0.2802
sharpe 0.05478, t-value 8.568 trade % 28.31, big trade % 25.39, loss 0.2404
sharpe 0.1157, t-value 15.97 trade % 22.05, big trade % 19.76, loss 0.3236
sharpe 0.08981, t-value 15.96 trade % 36.54, big trade % 34.02, loss 0.2971
sharpe 0.1741, t-value 26.35 trade % 26.51, big trade % 24.3, loss 0.2296
sharpe 0.1465, t-value 23.26 trade % 29.2, big trade % 26.48, loss 0.2184
sharpe 0.188, t-value 26.22 trade % 22.51, big trade % 19.56, loss 0.2866
sharpe 0.1133, t-value 17.8 trade % 28.58, big trade % 26.32, loss 0.2591
sharpe 0.1267, t-value 19.25 trade % 26.71, big trade % 23.73, loss 0.2844
sharpe 0.1287, t-value 16.67 trade % 19.42, big trade % 16.23, loss 0.2595
sharpe 0.1002, t-value 14.52 trade % 24.31, big trade % 20.93, loss 0.2818
sharpe 0.1825, t-value 29.1 trade % 29.43, big trade % 27.08, loss 0.2278
sharpe 0.1099, t-value 17.51 trade % 29.39, big trade % 27.14, loss 0.2711
sharpe 0.1447, t-value 23.97 

Saving model checkpoint to ./results\checkpoint-1000
Configuration saved in ./results\checkpoint-1000\config.json
Model weights saved in ./results\checkpoint-1000\pytorch_model.bin


sharpe 0.127, t-value 23.72 trade % 40.38, big trade % 37.12, loss 0.3356
sharpe 0.09888, t-value 17.68 trade % 37.02, big trade % 33.47, loss 0.351
sharpe 0.02565, t-value 4.783 trade % 40.25, big trade % 36.79, loss 0.3781
sharpe 0.1268, t-value 22.86 trade % 37.62, big trade % 33.61, loss 0.4141
sharpe 0.07462, t-value 13.95 trade % 40.45, big trade % 36.33, loss 0.2839
sharpe 0.1473, t-value 25.85 trade % 35.67, big trade % 31.59, loss 0.2643
sharpe 0.09471, t-value 17.47 trade % 39.38, big trade % 35.38, loss 0.3556
sharpe 0.04963, t-value 9.82 trade % 45.32, big trade % 40.98, loss 0.4484
sharpe 0.1128, t-value 21.62 trade % 42.51, big trade % 37.18, loss 0.4514
sharpe 0.1834, t-value 33.16 trade % 37.82, big trade % 34.08, loss 0.3451
sharpe 0.1605, t-value 30.74 trade % 42.48, big trade % 38.44, loss 0.3468
sharpe 0.03953, t-value 7.191 trade % 38.29, big trade % 33.72, loss 0.6253
sharpe -0.03108, t-value -5.942 trade % 42.32, big trade % 38.18, loss 0.494
sharpe 0.06445, t-va

***** Running Evaluation *****
  Num examples = 172
  Batch size = 1


sharpe 0.1551, t-value 23.36 trade % 26.26, big trade % 21.65, loss 0.2777
sharpe 0.06455, t-value 9.513 trade % 25.13, big trade % 19.22, loss 0.2403
sharpe 0.1214, t-value 15.21 trade % 18.17, big trade % 13.46, loss 0.324
sharpe 0.08256, t-value 14.8 trade % 37.21, big trade % 32.76, loss 0.3025
sharpe 0.1439, t-value 20.92 trade % 24.46, big trade % 20.96, loss 0.2344
sharpe 0.1282, t-value 17.13 trade % 20.68, big trade % 14.95, loss 0.2233
sharpe 0.1709, t-value 21.01 trade % 17.48, big trade % 12.07, loss 0.2948
sharpe 0.108, t-value 16.01 trade % 25.43, big trade % 20.59, loss 0.2616
sharpe 0.1269, t-value 15.45 trade % 17.17, big trade % 11.61, loss 0.2886
sharpe 0.1357, t-value 18.52 trade % 21.56, big trade % 17.14, loss 0.2575
sharpe 0.1086, t-value 13.16 trade % 17.0, big trade % 11.82, loss 0.2798
sharpe 0.1987, t-value 29.9 trade % 26.21, big trade % 21.5, loss 0.2284
sharpe 0.1142, t-value 18.07 trade % 28.95, big trade % 24.78, loss 0.2693
sharpe 0.1504, t-value 24.06 

***** Running Evaluation *****
  Num examples = 172
  Batch size = 1


sharpe 0.1339, t-value 13.41 trade % 11.61, big trade % 4.149, loss 0.2901
sharpe 0.02919, t-value 2.89 trade % 11.34, big trade % 6.579, loss 0.2439
sharpe 0.1319, t-value 11.98 trade % 9.541, big trade % 5.527, loss 0.3292
sharpe 0.09199, t-value 11.26 trade % 17.34, big trade % 5.343, loss 0.3162
sharpe 0.1436, t-value 14.22 trade % 11.34, big trade % 5.152, loss 0.2404
sharpe 0.1114, t-value 10.96 trade % 11.22, big trade % 7.152, loss 0.2273
sharpe 0.147, t-value 12.99 trade % 9.035, big trade % 6.272, loss 0.3138
sharpe 0.09749, t-value 9.696 trade % 11.45, big trade % 5.892, loss 0.2683
sharpe 0.1348, t-value 11.25 trade % 8.069, big trade % 4.497, loss 0.297
sharpe 0.1268, t-value 12.75 trade % 11.7, big trade % 4.023, loss 0.2702
sharpe 0.113, t-value 10.94 trade % 10.86, big trade % 7.674, loss 0.2853
sharpe 0.1886, t-value 21.95 trade % 15.68, big trade % 10.22, loss 0.2375
sharpe 0.1137, t-value 12.7 trade % 14.43, big trade % 6.105, loss 0.2861
sharpe 0.1366, t-value 14.85

Saving model checkpoint to ./results\checkpoint-1500
Configuration saved in ./results\checkpoint-1500\config.json
Model weights saved in ./results\checkpoint-1500\pytorch_model.bin


sharpe 0.1356, t-value 21.61 trade % 29.39, big trade % 21.46, loss 0.3147
sharpe 0.1844, t-value 28.2 trade % 27.07, big trade % 18.73, loss 0.471
sharpe 0.1414, t-value 23.17 trade % 31.07, big trade % 22.74, loss 0.4033
sharpe 0.07862, t-value 12.1 trade % 27.4, big trade % 19.33, loss 0.3717
sharpe 0.1669, t-value 27.09 trade % 30.5, big trade % 21.14, loss 0.4706
sharpe 0.06968, t-value 11.26 trade % 30.21, big trade % 21.61, loss 0.4561
sharpe 0.143, t-value 25.76 trade % 37.55, big trade % 25.46, loss 0.5533
sharpe 0.1329, t-value 24.25 trade % 38.58, big trade % 30.36, loss 0.4289
sharpe 0.1063, t-value 16.26 trade % 27.11, big trade % 19.42, loss 0.4123
sharpe 0.1111, t-value 18.57 trade % 32.35, big trade % 26.53, loss 0.5627
sharpe 0.07149, t-value 10.31 trade % 24.1, big trade % 17.95, loss 0.7368
sharpe 0.1117, t-value 18.34 trade % 31.22, big trade % 21.6, loss 0.5
sharpe 0.1312, t-value 19.54 trade % 25.68, big trade % 17.25, loss 0.3605
sharpe 0.1365, t-value 22.48 trad

***** Running Evaluation *****
  Num examples = 172
  Batch size = 1


sharpe 0.1484, t-value 21.66 trade % 24.66, big trade % 20.25, loss 0.2795
sharpe 0.04707, t-value 6.478 trade % 21.92, big trade % 15.14, loss 0.2418
sharpe 0.1112, t-value 14.66 trade % 20.1, big trade % 14.68, loss 0.3246
sharpe 0.085, t-value 15.49 trade % 38.43, big trade % 35.45, loss 0.2973
sharpe 0.144, t-value 20.59 trade % 23.66, big trade % 18.11, loss 0.2351
sharpe 0.1098, t-value 15.33 trade % 22.55, big trade % 14.77, loss 0.225
sharpe 0.1746, t-value 22.02 trade % 18.41, big trade % 11.95, loss 0.2894
sharpe 0.07232, t-value 10.52 trade % 24.5, big trade % 18.57, loss 0.2672
sharpe 0.1229, t-value 15.07 trade % 17.4, big trade % 9.38, loss 0.286
sharpe 0.1431, t-value 19.7 trade % 21.93, big trade % 15.17, loss 0.2563
sharpe 0.1075, t-value 13.59 trade % 18.49, big trade % 11.61, loss 0.2776
sharpe 0.1956, t-value 30.51 trade % 28.15, big trade % 23.12, loss 0.2272
sharpe 0.1149, t-value 18.55 trade % 30.19, big trade % 26.12, loss 0.266
sharpe 0.151, t-value 24.99 trade

***** Running Evaluation *****
  Num examples = 172
  Batch size = 1


sharpe 0.1291, t-value 17.75 trade % 21.89, big trade % 15.09, loss 0.2856
sharpe 0.03998, t-value 5.582 trade % 22.57, big trade % 13.97, loss 0.2424
sharpe 0.1118, t-value 14.21 trade % 18.69, big trade % 12.7, loss 0.3249
sharpe 0.0955, t-value 15.91 trade % 32.12, big trade % 27.89, loss 0.2897
sharpe 0.1394, t-value 19.24 trade % 22.04, big trade % 14.87, loss 0.2366
sharpe 0.09141, t-value 12.26 trade % 20.83, big trade % 13.68, loss 0.2268
sharpe 0.1616, t-value 19.92 trade % 17.58, big trade % 11.19, loss 0.294
sharpe 0.08226, t-value 11.74 trade % 23.57, big trade % 16.48, loss 0.2664
sharpe 0.1262, t-value 14.68 trade % 15.66, big trade % 8.097, loss 0.2832
sharpe 0.142, t-value 19.68 trade % 22.24, big trade % 14.14, loss 0.2553
sharpe 0.1069, t-value 13.72 trade % 19.06, big trade % 12.65, loss 0.2789
sharpe 0.1833, t-value 28.31 trade % 27.61, big trade % 21.76, loss 0.23
sharpe 0.1205, t-value 18.33 trade % 26.77, big trade % 21.92, loss 0.263
sharpe 0.1508, t-value 22.74

***** Running Evaluation *****
  Num examples = 172
  Batch size = 1


sharpe 0.09975, t-value 11.24 trade % 14.68, big trade % 12.27, loss 0.2911
sharpe 0.03142, t-value 3.778 trade % 16.74, big trade % 13.18, loss 0.2433
sharpe 0.1234, t-value 13.57 trade % 14.0, big trade % 12.01, loss 0.3233
sharpe 0.1015, t-value 13.75 trade % 21.26, big trade % 18.53, loss 0.2888
sharpe 0.1582, t-value 18.32 trade % 15.51, big trade % 12.12, loss 0.2366
sharpe 0.1177, t-value 14.81 trade % 18.33, big trade % 14.42, loss 0.2243
sharpe 0.1604, t-value 17.54 trade % 13.83, big trade % 11.38, loss 0.2959
sharpe 0.07833, t-value 9.681 trade % 17.68, big trade % 14.82, loss 0.2675
sharpe 0.1327, t-value 14.47 trade % 13.78, big trade % 10.18, loss 0.2777
sharpe 0.1295, t-value 14.27 trade % 14.05, big trade % 10.21, loss 0.2609
sharpe 0.1084, t-value 13.5 trade % 17.93, big trade % 14.49, loss 0.278
sharpe 0.1888, t-value 26.74 trade % 23.23, big trade % 20.75, loss 0.2302
sharpe 0.1204, t-value 15.4 trade % 18.93, big trade % 16.49, loss 0.265
sharpe 0.151, t-value 17.89

Saving model checkpoint to ./results\checkpoint-2000
Configuration saved in ./results\checkpoint-2000\config.json
Model weights saved in ./results\checkpoint-2000\pytorch_model.bin


sharpe 0.08693, t-value 13.04 trade % 26.04, big trade % 21.68, loss 0.4987
sharpe 0.06712, t-value 10.39 trade % 27.75, big trade % 22.69, loss 0.4798
sharpe 0.1154, t-value 17.03 trade % 25.23, big trade % 18.86, loss 0.4303
sharpe 0.09789, t-value 13.76 trade % 22.86, big trade % 16.57, loss 0.4276
sharpe 0.06326, t-value 6.77 trade % 13.25, big trade % 5.354, loss 0.3928
sharpe 0.1483, t-value 17.85 trade % 16.78, big trade % 10.12, loss 0.4101
sharpe 0.1012, t-value 14.13 trade % 22.58, big trade % 13.34, loss 0.3474
sharpe 0.05988, t-value 9.217 trade % 27.42, big trade % 19.41, loss 0.4015
sharpe 0.1377, t-value 20.31 trade % 25.2, big trade % 17.31, loss 0.4637
sharpe 0.07361, t-value 10.32 trade % 22.73, big trade % 14.1, loss 0.4196
sharpe 0.1696, t-value 24.25 trade % 23.66, big trade % 17.95, loss 0.4655
sharpe 0.08035, t-value 11.26 trade % 22.75, big trade % 15.8, loss 0.4283
sharpe 0.04064, t-value 4.899 trade % 16.82, big trade % 10.2, loss 0.3268
sharpe 0.146, t-value 

***** Running Evaluation *****
  Num examples = 172
  Batch size = 1


sharpe 0.1008, t-value 10.75 trade % 13.15, big trade % 11.05, loss 0.292
sharpe 0.0375, t-value 4.54 trade % 16.96, big trade % 14.19, loss 0.2429
sharpe 0.1298, t-value 13.94 trade % 13.33, big trade % 11.44, loss 0.3233
sharpe 0.1108, t-value 13.47 trade % 17.13, big trade % 14.54, loss 0.2862
sharpe 0.1695, t-value 19.7 trade % 15.64, big trade % 12.74, loss 0.2354
sharpe 0.1263, t-value 16.22 trade % 19.07, big trade % 15.8, loss 0.2233
sharpe 0.1547, t-value 17.0 trade % 13.98, big trade % 11.87, loss 0.2995
sharpe 0.07828, t-value 9.334 trade % 16.45, big trade % 14.04, loss 0.2679
sharpe 0.1305, t-value 15.29 trade % 15.89, big trade % 12.93, loss 0.2849
sharpe 0.1287, t-value 14.34 trade % 14.36, big trade % 8.736, loss 0.2628
sharpe 0.1041, t-value 13.59 trade % 19.74, big trade % 16.44, loss 0.2837
sharpe 0.1863, t-value 25.83 trade % 22.24, big trade % 20.5, loss 0.2304
sharpe 0.1265, t-value 14.84 trade % 15.94, big trade % 13.53, loss 0.265
sharpe 0.1544, t-value 16.56 tr

***** Running Evaluation *****
  Num examples = 172
  Batch size = 1


sharpe 0.0948, t-value 10.0 trade % 12.89, big trade % 10.27, loss 0.2931
sharpe 0.0414, t-value 5.136 trade % 17.81, big trade % 15.99, loss 0.2423
sharpe 0.1379, t-value 14.89 trade % 13.5, big trade % 11.4, loss 0.321
sharpe 0.1161, t-value 12.48 trade % 13.38, big trade % 10.32, loss 0.2852
sharpe 0.1775, t-value 21.31 trade % 16.68, big trade % 13.31, loss 0.234
sharpe 0.1245, t-value 15.63 trade % 18.25, big trade % 15.73, loss 0.2232
sharpe 0.1459, t-value 15.93 trade % 13.81, big trade % 12.26, loss 0.3015
sharpe 0.08905, t-value 10.83 trade % 17.12, big trade % 14.42, loss 0.2665
sharpe 0.1307, t-value 15.89 trade % 17.12, big trade % 14.72, loss 0.2806
sharpe 0.1273, t-value 13.34 trade % 12.72, big trade % 7.897, loss 0.2631
sharpe 0.1041, t-value 14.4 trade % 22.15, big trade % 20.06, loss 0.2812
sharpe 0.183, t-value 25.59 trade % 22.65, big trade % 20.7, loss 0.2303
sharpe 0.1275, t-value 13.68 trade % 13.33, big trade % 10.55, loss 0.2647
sharpe 0.1527, t-value 15.43 tra

Saving model checkpoint to ./results\checkpoint-2500
Configuration saved in ./results\checkpoint-2500\config.json
Model weights saved in ./results\checkpoint-2500\pytorch_model.bin


sharpe 0.03473, t-value 5.204 trade % 25.98, big trade % 19.43, loss 0.3055
sharpe 0.08393, t-value 10.35 trade % 17.6, big trade % 12.77, loss 0.351
sharpe 0.1494, t-value 16.66 trade % 14.4, big trade % 10.3, loss 0.3538
sharpe 0.1065, t-value 14.69 trade % 22.01, big trade % 18.09, loss 0.2624
sharpe 0.177, t-value 19.57 trade % 14.16, big trade % 9.163, loss 0.2879
sharpe 0.1164, t-value 17.39 trade % 25.85, big trade % 21.17, loss 0.3575
sharpe 0.1639, t-value 21.26 trade % 19.47, big trade % 11.04, loss 0.3564
sharpe 0.1057, t-value 11.98 trade % 14.87, big trade % 9.806, loss 0.286
sharpe 0.1032, t-value 9.006 trade % 8.814, big trade % 3.3, loss 0.2563
sharpe 0.1498, t-value 23.68 trade % 28.94, big trade % 22.94, loss 0.5298
sharpe 0.09447, t-value 11.02 trade % 15.74, big trade % 4.241, loss 0.4163
sharpe 0.1096, t-value 15.76 trade % 23.94, big trade % 19.09, loss 0.2908
sharpe 0.1081, t-value 17.77 trade % 31.29, big trade % 27.8, loss 0.365
sharpe 0.1261, t-value 16.74 tra

***** Running Evaluation *****
  Num examples = 172
  Batch size = 1


sharpe 0.1032, t-value 11.11 trade % 13.41, big trade % 12.01, loss 0.2914
sharpe 0.02695, t-value 3.253 trade % 16.86, big trade % 15.05, loss 0.2436
sharpe 0.128, t-value 12.6 trade % 11.23, big trade % 9.656, loss 0.3237
sharpe 0.1096, t-value 13.34 trade % 17.14, big trade % 14.74, loss 0.2844
sharpe 0.176, t-value 20.99 trade % 16.47, big trade % 13.95, loss 0.2337
sharpe 0.1172, t-value 14.05 trade % 16.63, big trade % 14.12, loss 0.2245
sharpe 0.1504, t-value 15.68 trade % 12.58, big trade % 11.19, loss 0.3005
sharpe 0.0882, t-value 10.8 trade % 17.35, big trade % 15.1, loss 0.2662
sharpe 0.1357, t-value 14.01 trade % 12.35, big trade % 9.891, loss 0.2795
sharpe 0.1278, t-value 12.73 trade % 11.47, big trade % 6.994, loss 0.2632
sharpe 0.1051, t-value 13.2 trade % 18.23, big trade % 16.3, loss 0.2823
sharpe 0.1807, t-value 24.79 trade % 21.78, big trade % 20.49, loss 0.2311
sharpe 0.1248, t-value 14.63 trade % 15.92, big trade % 13.89, loss 0.2642
sharpe 0.1531, t-value 16.0 tra

***** Running Evaluation *****
  Num examples = 172
  Batch size = 1


sharpe 0.1015, t-value 10.31 trade % 11.96, big trade % 9.457, loss 0.2929
sharpe 0.03188, t-value 3.69 trade % 15.51, big trade % 13.18, loss 0.2434
sharpe 0.1281, t-value 11.72 trade % 9.684, big trade % 7.216, loss 0.3252
sharpe 0.114, t-value 12.5 trade % 13.91, big trade % 10.2, loss 0.2909
sharpe 0.1746, t-value 19.68 trade % 14.7, big trade % 11.78, loss 0.2351
sharpe 0.1119, t-value 12.82 trade % 15.21, big trade % 12.53, loss 0.2255
sharpe 0.1497, t-value 15.0 trade % 11.63, big trade % 10.31, loss 0.3047
sharpe 0.08635, t-value 10.07 trade % 15.72, big trade % 13.06, loss 0.2672
sharpe 0.1372, t-value 13.05 trade % 10.48, big trade % 7.458, loss 0.2845
sharpe 0.1281, t-value 10.94 trade % 8.436, big trade % 1.623, loss 0.2688
sharpe 0.1067, t-value 12.81 trade % 16.66, big trade % 14.23, loss 0.2839
sharpe 0.1811, t-value 23.89 trade % 20.13, big trade % 18.22, loss 0.2323
sharpe 0.1281, t-value 13.44 trade % 12.73, big trade % 8.604, loss 0.2709
sharpe 0.1545, t-value 14.8 t

***** Running Evaluation *****
  Num examples = 172
  Batch size = 1


sharpe 0.1025, t-value 10.62 trade % 12.42, big trade % 10.79, loss 0.2922
sharpe 0.02669, t-value 3.21 trade % 16.74, big trade % 14.62, loss 0.2436
sharpe 0.1269, t-value 12.11 trade % 10.55, big trade % 8.906, loss 0.3243
sharpe 0.1132, t-value 12.96 trade % 15.19, big trade % 12.53, loss 0.2872
sharpe 0.1765, t-value 20.63 trade % 15.81, big trade % 13.41, loss 0.2339
sharpe 0.1155, t-value 13.71 trade % 16.29, big trade % 13.59, loss 0.2248
sharpe 0.151, t-value 15.55 trade % 12.27, big trade % 10.75, loss 0.3025
sharpe 0.08986, t-value 10.73 trade % 16.49, big trade % 14.22, loss 0.2663
sharpe 0.1358, t-value 13.67 trade % 11.74, big trade % 9.287, loss 0.2805
sharpe 0.1296, t-value 11.94 trade % 9.82, big trade % 3.39, loss 0.2663
sharpe 0.1059, t-value 13.16 trade % 17.88, big trade % 15.84, loss 0.2822
sharpe 0.1806, t-value 24.29 trade % 20.94, big trade % 19.56, loss 0.2317
sharpe 0.128, t-value 13.97 trade % 13.79, big trade % 10.96, loss 0.2674
sharpe 0.155, t-value 15.44 

Saving model checkpoint to ./results\checkpoint-3000
Configuration saved in ./results\checkpoint-3000\config.json
Model weights saved in ./results\checkpoint-3000\pytorch_model.bin


sharpe 0.1626, t-value 23.59 trade % 24.35, big trade % 18.2, loss 0.2934
sharpe 0.1051, t-value 8.292 trade % 7.206, big trade % 2.023, loss 0.2683
sharpe 0.03934, t-value 3.266 trade % 7.977, big trade % 2.438, loss 0.2414
sharpe 0.1072, t-value 14.58 trade % 21.41, big trade % 11.08, loss 0.2678
sharpe 0.18, t-value 32.51 trade % 37.75, big trade % 33.32, loss 0.3023
sharpe 0.06012, t-value 10.82 trade % 37.52, big trade % 32.56, loss 0.3453
sharpe 0.1592, t-value 17.21 trade % 13.51, big trade % 3.993, loss 0.3194
sharpe 0.108, t-value 19.28 trade % 36.86, big trade % 22.52, loss 0.1963
sharpe 0.117, t-value 20.87 trade % 36.82, big trade % 32.19, loss 0.3093
sharpe 0.1083, t-value 17.61 trade % 30.62, big trade % 26.03, loss 0.3637
sharpe 0.09599, t-value 14.96 trade % 28.1, big trade % 17.04, loss 0.3704
sharpe 0.1528, t-value 24.67 trade % 30.16, big trade % 23.45, loss 0.3302
sharpe 0.1406, t-value 17.57 trade % 18.07, big trade % 5.186, loss 0.324
sharpe 0.0484, t-value 3.592 



Training completed. Do not forget to share your model on huggingface.co/models =)




TrainOutput(global_step=3098, training_loss=0.4807242423661529, metrics={'train_runtime': 1148.6036, 'train_samples_per_second': 2.697, 'train_steps_per_second': 2.697, 'total_flos': 0.0, 'train_loss': 0.4807242423661529, 'epoch': 1.0})

In [None]:
del trainer
del model
torch.cuda.empty_cache()

# Appendix

## quick timing check

In [None]:
model = GPT2Trader(config).cuda()

In [72]:
%%timeit
fake_data = torch.randn(4, 391, 256)
fake_data = fake_data.cuda()
model(fake_data)
cpu = fake_data.cpu()

28.2 ms ± 2.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [73]:
model = GPT2Trader(config).cpu()

Using 9 layers


In [74]:
%%timeit
fake_data = torch.randn(4, 391, 256)
model(fake_data)

748 ms ± 82.3 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
