In [1]:
import torch

from transformers import PreTrainedModel
from trader_models import SRUTrader, SRUConfig, SGConvConfig, SGConvTrader, MegaConfig, MegaTrader
import datasets
from datasets import load_dataset, Dataset, DatasetDict

from torch.utils.data import DataLoader
from transformers import Trainer, TrainingArguments

import numpy as np

import logging
logging.disable(logging.INFO)

In [2]:
def compute_metrics(preds):
    soft_profit, soft_trade = preds.predictions
    abs_trade = np.abs(soft_trade)
    abs_trade = abs_trade.astype('float64') # half precision will cause the sum to overflow on next line
    trades = abs_trade.sum()
    
    day_profits = soft_profit.sum(axis = (1, 2))
    
    metrics = {
        'day profit': day_profits.mean(),
        'day sharpe': day_profits.mean() / day_profits.std(),
        'trade %': trades * 100 / soft_profit.size,
        
        'full trade %': (abs_trade >= .7).mean() * 100,
        'full trade accuracy': (soft_profit[abs_trade >= .7] > 0).mean() * 100,
        'full trade g/l': soft_profit[(abs_trade >= .7) & (soft_profit > 0)].mean()
                          / -soft_profit[(abs_trade >= .7) & (soft_profit < 0)].mean(),
        
        'medium trade %': ((abs_trade < .7) & (abs_trade >= .4)).mean() * 100,
        'medium trade accuracy': (soft_profit[(abs_trade < .7) & (abs_trade >= .4)] > 0).mean() * 100,
        'medium trade g/l': soft_profit[(abs_trade < .7) & (abs_trade >= .4) & (soft_profit > 0)].mean()
                            / -soft_profit[(abs_trade < .7) & (abs_trade >= .4) & (soft_profit < 0)].mean(),       
        
        'small trade %': ((abs_trade < .4) & (abs_trade >= .2)).mean() * 100,
        'small trade accuracy': (soft_profit[(abs_trade < .4) & (abs_trade >= .2)] > 0).mean() * 100,
        'small trade g/l': soft_profit[(abs_trade < .4) & (abs_trade >= .2) & (soft_profit > 0)].mean()
                            / -soft_profit[(abs_trade < .4) & (abs_trade >= .2) & (soft_profit < 0)].mean(),        
    }
    
    # round the metrics
    metrics = {k: np.format_float_positional(v, precision = 4) for k, v in metrics.items()}
    
    return metrics

In [3]:
fx = Dataset.load_from_disk('data/fx_days')

# make splits
split = fx.train_test_split(.003, shuffle = False)
valid_test = split['test'].train_test_split(.3, shuffle = False)
fx = DatasetDict({
    'train': split['train'],
    'validation': valid_test['train'],
    'test': valid_test['test']
})

In [4]:
fx

DatasetDict({
    train: Dataset({
        features: ['ohlcv', 'labels', 'future'],
        num_rows: 35213
    })
    validation: Dataset({
        features: ['ohlcv', 'labels', 'future'],
        num_rows: 74
    })
    test: Dataset({
        features: ['ohlcv', 'labels', 'future'],
        num_rows: 32
    })
})

In [5]:
training_args = TrainingArguments(
    output_dir = "./results",
    logging_strategy = "steps",
    evaluation_strategy = "steps",
    logging_steps = 200,
    eval_steps = 200,
    save_steps = 10000,
    report_to = "none",
    learning_rate = 1e-3,
    weight_decay = .01,
    lr_scheduler_type = "cosine",
    warmup_ratio = .05,
    num_train_epochs = 1,
    per_device_train_batch_size = 8,
    per_device_eval_batch_size = 8,
    max_grad_norm = 1,
#     fp16 = True,
)

In [6]:
config = SGConvConfig(
    n_embd = 320, n_head = 1, hidden_dropout_prob = 0
)

model = SGConvTrader(config)
trainer = Trainer(
    model = model,
    args = training_args,
    train_dataset = fx['train'],
    eval_dataset = fx['validation'],
    compute_metrics = compute_metrics
)

Using 13 layers


In [9]:
# oanda data

# SGCONV transformer architecture! lr of 1e-3, batch size 8 hidden size 320, 1 head
# NO dropout, weight decay
# NO diagonal attention allowed, NO rotary embed, norm or residual on conv embed, kernel size of 5

# ce loss with conditioned kelly betting
trainer.train()

Could not estimate the number of tokens of the input, floating-point operations will not be computed


Step,Training Loss,Validation Loss,Day profit,Day sharpe,Trade %,Full trade %,Full trade accuracy,Full trade g/l,Medium trade %,Medium trade accuracy,Medium trade g/l,Small trade %,Small trade accuracy,Small trade g/l
200,2.2105,2.17126,-0.01,-0.0719,5.3201,0.4756,1.2936,0.7324,0.5752,8.1581,1.1005,0.9064,30.9789,1.1299
400,2.1705,2.160216,0.0393,0.3422,6.9082,0.8293,0.0251,0.7456,0.1713,2.4954,0.8416,1.1363,49.9541,1.2737
600,2.1688,2.160419,0.0167,0.1641,7.8999,0.8098,0.0129,0.2321,0.3131,5.2614,0.7012,3.2679,51.7262,0.9372
800,2.1626,2.153858,-0.0048,-0.0378,6.3985,0.942,0.3099,1.1583,0.5256,14.5606,0.9703,1.419,55.1988,0.7953
1000,2.1556,2.152036,0.0029,0.022,6.4033,1.1714,4.157,0.511,0.5486,37.9966,0.7135,1.8075,53.5275,0.7664
1200,2.1618,2.14825,0.0249,0.3081,6.5952,1.4711,0.3473,0.1748,0.317,60.0987,1.0863,2.4341,61.0906,0.9494
1400,2.1531,2.147601,0.0361,0.2121,8.5314,1.4307,0.2988,1.0577,0.2738,62.7951,0.773,3.4826,59.5719,0.9755
1600,2.1587,2.147372,-0.0039,-0.0418,6.9273,1.2688,0.0,,0.2213,4.3827,0.2958,2.2188,53.5598,0.8021
1800,2.1615,2.148,0.0188,0.2717,6.3906,1.4182,1.9778,3.6387,0.5749,62.2778,1.3274,2.2574,60.0166,1.0657
2000,2.1537,2.143505,0.0272,0.2621,7.5424,1.4698,3.3698,0.5353,1.4021,62.7649,1.1608,3.9239,57.2066,0.927


  'full trade g/l': soft_profit[(abs_trade >= .7) & (soft_profit > 0)].mean()
  ret = ret.dtype.type(ret / rcount)
  / -soft_profit[(abs_trade >= .7) & (soft_profit < 0)].mean(),


TrainOutput(global_step=4402, training_loss=2.156417083003639, metrics={'train_runtime': 2489.5752, 'train_samples_per_second': 14.144, 'train_steps_per_second': 1.768, 'total_flos': 0.0, 'train_loss': 2.156417083003639, 'epoch': 1.0})

In [12]:
# trainer.evaluate(fx['test'])

In [10]:
trainer.save_model('srupp.model')

# quick prediction test to ensure model isn't cheating

In [11]:
model = SRUTrader.from_pretrained('srupp.model', config = config).cuda().eval()

In [None]:
import matplotlib.pyplot as plt

for day in range(4):
    # cut data short so no backwards flow of info
    test_day = torch.tensor(fx['validation']['ohlcv'][day][:]).unsqueeze(0).cuda()
    test_futures = torch.tensor(fx['validation']['future'][day][:]).unsqueeze(0).cuda()
    after = torch.tensor(fx['validation']['ohlcv'][day][2*60:3*60])
    with torch.no_grad():
        # no access to futures
        pred = model(test_day)[0][2*60]
    torch.cuda.empty_cache()
#     if (pred.abs() >= .9).any():
    if True:
        print(day)
        print((pred.cpu() * 100).round())
        
        plt.plot(after.select(dim = 1, index = -1))
        plt.show()

In [None]:
import matplotlib.pyplot as plt

for day in range(4):
    # cut data short so no backwards flow of info
    test_day = torch.tensor(fx['validation']['ohlcv'][day][:121]).unsqueeze(0).cuda()
    after = torch.tensor(fx['validation']['ohlcv'][day][120:240])
    with torch.no_grad():
        # no access to futures
        pred = model(test_day)[0][120]
        
    torch.cuda.empty_cache()
#     if (pred.abs() >= .9).any():
    if True:
        print(day)
#         plt.pcolormesh(pred.cpu().unsqueeze(0))
        plt.show()
        print((pred.cpu() * 100).round())
        
        plt.plot(after.select(dim = 1, index = -1))
        plt.show()

# TODO: graph these instead of showing raw data (maybe even on same plot or at least side by side)

In [None]:
soft_profit, soft_trade = trainer.predict(fx['validation']).predictions

In [None]:
(soft_profit.sum(axis = (1, 2)) < 0).mean()

In [None]:
soft_profit.sum(axis = (1, 2)).std()

In [None]:
import matplotlib.pyplot as plt

plt.hist(soft_profit.sum(axis = (1, 2)), bins = 20)
plt.show()

In [None]:
# full trade percent on 24 hours, ignoring last hour, it makes most trades in london and ny sessions (esp overlap)
((np.abs(soft_trade) > .2).mean(axis = (0, 2)).reshape(-1, 60).mean(axis = 1) * 100).round()

In [None]:
# full trade accuracy on 24 hours
np.nanmean(np.where(np.abs(soft_trade) > .2, soft_profit > 0, np.nan), axis = (0, 2)).reshape(-1, 60).mean(axis = 1)

In [None]:
# full trade profit on 24 hours
np.nanmean(np.where(np.abs(soft_trade) > .3, soft_profit, np.nan), axis = (0, 2)).reshape(-1, 60).mean(axis = 1)

In [None]:
# percent full trades on all timeframes
(np.abs(soft_trade) > .2).mean(axis = (0, 1))

In [None]:
# full trade accuracy on all timeframes
np.nanmean(np.where(np.abs(soft_trade) > .2, soft_profit > 0, np.nan), axis = (0, 1))

In [None]:
# full trade gain on all timeframes
np.nanmean(np.where((np.abs(soft_trade) > .3) & (soft_profit > 0), soft_profit, np.nan), axis = (0, 1))

In [None]:
# full trade loss on all timeframes
np.nanmean(np.where((np.abs(soft_trade) > .3) & (soft_profit < 0), soft_profit, np.nan), axis = (0, 1))

In [None]:
# del trainer
# del model
# torch.cuda.empty_cache()

# Appendix

## SGConv Experiments