In [1]:
import os

import torch
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
import ray
from ray import train, tune
from ray.train import Checkpoint
from ray.tune.schedulers import PopulationBasedTraining
from attention_pytorch import focal_loss, LSTMWithAttention
import pandas as pd
import numpy as np

import math as math
import aiutils.ai_backtester as bt
import matplotlib.pyplot as plt

from sortino import compute_sortino

%matplotlib inline
%config InlineBackend.figure_format = 'retina'
%load_ext autoreload
%autoreload 2

pd.options.display.width=None

np.set_printoptions(suppress=True)


# Importation des jeux de données et céation des features

In [2]:
from preprocess import *



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user

                        bars     days   weeks  months  years
Before Date Filtering  73863  4281.38  611.62  138.11  11.73
After Date Filtering   73863  4281.38  611.62  138.11  11.73
Percentage : 100.0%

Data Shapes (train,val,test,datetime)
(51693, 16, 12) (11077, 16, 12) (11078, 16, 12) (73848,)
2010-01-07T15:00:00.000000000 2021-09-27T09:00:00.000000000
After Time Filtering
(10652, 16, 12) (2270, 16, 12) (2270, 16, 12) (15192,)
2010-01-07T15:00:00.000000000 2021-09-24T16:00:00.000000000
val start : 2018-03-27T12:00:00.000000000
test start : 2019-12-26T12:00:00.000000000
                       bars    days  weeks  months  years
Before Date Filtering  8208  470.54  67.22   15.18   1.29
After Date Filtering   8208  470.54  67.22   15.18   1.29
Percentage : 100.0%

Data Shapes (train,val,test,datetime)
(8193, 16, 12) (0, 16, 12) (0, 16, 12) (8193,)
2021-09-28T01:00:00.000000000 2023-01-10T23:00:00.000000000
After Time Filtering
(1680, 16, 12) (0, 16, 12) (0, 16, 12) (1680,)
2021-09-28T1

In [3]:
def get_data_loaders(X_train, y_train, X_val, y_val, batch_size=64):

    #convertion au format pytorch des données fourines par la fonction préprocess LUSIS, initialement au format numpy

    X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
    y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
    X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
    y_val_tensor = torch.tensor(y_val, dtype=torch.float32)

   
    train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
    val_dataset = TensorDataset(X_val_tensor, y_val_tensor)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

    return train_loader, val_loader

In [4]:
train_loader, val_loader = get_data_loaders(X_train, y_train, X_val, y_val)

# Création de la fonction d'entrainement

In [5]:
lytrain = len(y_train)
lyval = len(y_val)

def train_attention(config):

    ######## création du modèle ########

    step = 1
    model = LSTMWithAttention(len(features),6,12,32, softmax = True)
    optimizer = optim.SGD(
        model.parameters(),
        lr=config.get("lr", 0.01),
        momentum=config.get("momentum", 0.9),
    )
    criterion = focal_loss(alpha=config.get("alpha", 0.25), gamma=config.get("gamma", 2.0))

    ######## chargement du checkpoint s'il existe ########

    checkpoint = train.get_checkpoint()
    if checkpoint:
        with checkpoint.as_directory() as checkpoint_dir:
            checkpoint_dict = torch.load(os.path.join(checkpoint_dir, "checkpoint.pt"))

        model.load_state_dict(checkpoint_dict["model_state_dict"])
        optimizer.load_state_dict(checkpoint_dict["optimizer_state_dict"])
        for param_group in optimizer.param_groups:
            param_group["lr"] = config.get("lr", 0.01)
            param_group["momentum"] = config.get("momentum", 0.9)

        step = checkpoint_dict["step"] + 1

    ######## boucle d'entrainement ########

    while True:

        ### entraiement ###
        model.train()
        for inputs, labels in ray.get(train_loader):
            optimizer.zero_grad()
            inputs = inputs.float()
            outputs = model(inputs)
            loss = criterion(labels, outputs)
            loss.backward()
            optimizer.step()


        ### evaluation ###
        with torch.no_grad():
            model.eval()
            val_inputs = torch.tensor(ray.get(X_val), dtype=torch.float32) 
            val_outputs = model(val_inputs).detach().cpu().numpy()
        sortino = compute_sortino(ray.get(df),
                        val_outputs,
                        ray.get(y_datetime)[lytrain : lytrain+lyval],
                        predict_bars=horizon,
                        bh = False,
                        qty=10000,
                        spread = 0.0000,
                        starting_equity=10000,
                        symbol=pair,
                        side='both',
                        name='{} Model V1'.format(pair),
                        author='CY',
                        note=None,
                        showequitycurve=True,
                        showtrades=True,
                        showplstats=True,
                        showmaemfe=True)
        metrics = {"loss": loss.item(), "lr": config["lr"], "sortino":sortino}

        ### rendre compte au scheduler et sauvegarder si nécessaire ###
        if step % config["checkpoint_interval"] == 0:
            tmpdir = "C:/ray_checkpoints"
            torch.save({
                "step": step,
                "model_state_dict": model.state_dict(),
                "optimizer_state_dict": optimizer.state_dict(),
            }, tmpdir + "/checkpoint.pt")
            train.report(metrics, checkpoint=Checkpoint.from_directory(tmpdir))
        else:
            train.report(metrics)

        step += 1

# Population Based Training

ou autre en changeant de scheduler

In [6]:
perturbation_interval = 5

scheduler = PopulationBasedTraining(
    time_attr="training_iteration",
    perturbation_interval=5,
    metric="sortino",
    mode="max",
    hyperparam_mutations={
        "lr": tune.loguniform(0.0001, 1),
        "momentum": tune.choice([0.8, 0.9, 0.99]),
        "alpha": tune.uniform(0.2, 0.3),
        "gamma": tune.uniform(1.5, 2.5),
    },
)

In [7]:
######## initialisation de ray ########
if ray.is_initialized():
    ray.shutdown()
ray.init()

# ray.put donne l'equivalent de pointeurs, pour éviter de copier les données à chaque fois
df = ray.put(df) if not isinstance(train_loader, ray.ObjectRef) else train_loader
train_loader = ray.put(train_loader) if not isinstance(train_loader, ray.ObjectRef) else train_loader
val_loader = ray.put(val_loader) if not isinstance(val_loader, ray.ObjectRef) else val_loader
y_datetime = ray.put(y_datetime) if not isinstance(y_datetime, ray.ObjectRef) else y_datetime
X_val = ray.put(X_val) if not isinstance(X_val, ray.ObjectRef) else X_val

######## initialisation du tuner/scheduler ########
tuner = tune.Tuner(
    train_attention,
    run_config=train.RunConfig(
        name="pbt_test",
        # Stop when we've reached a threshold accuracy, or a maximum
        # training_iteration, whichever comes first
        stop={"sortino": 3, "training_iteration": 100},
        checkpoint_config=train.CheckpointConfig(
            checkpoint_score_attribute="loss",
            num_to_keep=16,
        ),
        storage_path="/ray_results",
    ),
    tune_config=tune.TuneConfig(
        scheduler=scheduler,
        reuse_actors=False,
        num_samples=16,
    ),
    param_space={
        "lr": tune.loguniform(0.0001, 1),
        "momentum": tune.choice([0.8, 0.9, 0.99]),
        "alpha": tune.uniform(0.1, 2),
        "gamma": tune.uniform(1, 5),
        "checkpoint_interval": perturbation_interval,
    },
)

######## lancement du tuner (très très long) ########
results_grid = tuner.fit()

0,1
Current time:,2024-04-26 23:36:52
Running for:,00:00:51.69
Memory:,14.8/15.8 GiB

Trial name,status,loc,alpha,gamma,lr,momentum
train_attention_f9af2_00000,RUNNING,127.0.0.1:13336,1.90554,2.92877,0.0411489,0.8
train_attention_f9af2_00001,RUNNING,127.0.0.1:6440,0.125833,2.37696,0.308957,0.9
train_attention_f9af2_00002,RUNNING,127.0.0.1:14792,1.69931,1.82833,0.000505845,0.8
train_attention_f9af2_00003,RUNNING,127.0.0.1:21284,0.214616,1.08478,0.0932992,0.8
train_attention_f9af2_00004,RUNNING,127.0.0.1:5244,1.78214,4.81058,0.416897,0.8
train_attention_f9af2_00005,RUNNING,127.0.0.1:7432,1.35127,2.9063,0.530364,0.9
train_attention_f9af2_00006,RUNNING,127.0.0.1:7396,1.18386,4.31836,0.184205,0.8
train_attention_f9af2_00007,RUNNING,127.0.0.1:18108,1.34878,4.06273,0.281661,0.8
train_attention_f9af2_00008,RUNNING,127.0.0.1:9164,0.139373,3.40661,0.0164569,0.9
train_attention_f9af2_00009,RUNNING,127.0.0.1:12424,0.564795,1.53322,0.765027,0.99
