In [6]:
import datetime
import time
from datetime import timedelta
from pathlib import Path
from pprint import pprint

import mpu
import nest_asyncio
import pandas as pd
import rapidjson
from freqtrade.configuration import Configuration

from lazyft.downloader import download
from lazyft.command_parameters import HyperoptParameters
from lazyft.strategy import load_strategy
from loguru import logger

from sklearn.preprocessing import robust_scale
from stable_baselines3.common.monitor import Monitor

import wandb
from trading_environments.my_freqtrade_env import Actions, SagesFreqtradeEnv

nest_asyncio.apply()

In [7]:
STRATEGY = "SagesGym5"
CONFIG = "configs/config.json"
PAIR = "BTC/USDT"
TRAINING_RANGE = "20180101-20201231"
VALIDATION_RANGE = "20210101-20211231"
TEST_RANGE = "20220101-"
TIMEFRAME = "1h"
freqtrade_config = Configuration.from_files([CONFIG])
freqtrade_config["timeframe"] = TIMEFRAME
freqtrade_config["pairs"] = [PAIR]
WINDOW_SIZE = 5
REQUIRED_STARTUP_CANDLES = WINDOW_SIZE
STARTING_CAPITAL = 1000
STAKE_AMOUNT = 100
# endregion

# todo load last saved score and use it as a starting point to prevent overwriting
MODEL_NAME = ""

LOAD_PREPROCESSED_DATA = False  # useful if you have to calculate a lot of features
SAVE_PREPROCESSED_DATA = True

LEARNING_TIME_STEPS = 4000000
LOG_DIR = "./logs/"
# TENSORBOARD_LOG = "./tensorboard/"
MODEL_DIR = Path("./models/")
_preprocessed_data_file = Path(
    "preprocessed",
    f"preprocessed_data__{PAIR.replace('/', '_')}__{TRAINING_RANGE}__{WINDOW_SIZE}.pickle",
)
"""End of settings"""
INDICATOR_FILTER = ["date", "open", "close", "high", "low", "volume"]

# hmm_model = Path(MODEL_DIR, f'btc_hmm.pickle')
model_dict_path = MODEL_DIR / "models.json"
if not model_dict_path.exists():
    model_dict = {}
else:
    model_dict = rapidjson.loads(model_dict_path.read_text())
    

print('Model directory:', MODEL_DIR)
print('Model dict:', model_dict)
print('Preprocessed data file:', _preprocessed_data_file)

config = {
    "strategy": STRATEGY,
    "config": CONFIG,
    "pair": PAIR,
    "training_range": TRAINING_RANGE,
    "timeframe": TIMEFRAME,
    "window_size": WINDOW_SIZE,
    "required_startup_candles": REQUIRED_STARTUP_CANDLES,
    "learning_time_steps": LEARNING_TIME_STEPS,
    "log_dir": LOG_DIR,
    "model_dir": MODEL_DIR,
    "model_dict": model_dict,
    "preprocessed_data_file": _preprocessed_data_file,
    "load_preprocessed_data": LOAD_PREPROCESSED_DATA,
    # "policy_kwargs": POLICY_KWARGS,
    "starting_capital": STARTING_CAPITAL,
    "stake_amount": STAKE_AMOUNT,
}
print('Config:')
pprint(config)

Model directory: models
Model dict: {}
Preprocessed data file: preprocessed/preprocessed_data__BTC_USDT__20180101-20201231__5.pickle
Config:
{'config': 'configs/config.json',
 'learning_time_steps': 4000000,
 'load_preprocessed_data': False,
 'log_dir': './logs/',
 'model_dict': {},
 'model_dir': PosixPath('models'),
 'pair': 'BTC/USDT',
 'preprocessed_data_file': PosixPath('preprocessed/preprocessed_data__BTC_USDT__20180101-20201231__5.pickle'),
 'required_startup_candles': 5,
 'stake_amount': 100,
 'starting_capital': 1000,
 'strategy': 'SagesGym5',
 'timeframe': '1h',
 'training_range': '20180101-20201231',
 'window_size': 5}


In [8]:
from freqtrade.data import history
from freqtrade.configuration import TimeRange
from lazyft.data_loader import load_pair_data

timerange = TimeRange.parse_timerange(TRAINING_RANGE)

ohlc_data = history.load_data(
    datadir=freqtrade_config["datadir"],
    pairs=[PAIR],
    timeframe=TIMEFRAME,
    timerange=timerange,
   startup_candles=REQUIRED_STARTUP_CANDLES + 1,
    fail_without_data=True,
    data_format=freqtrade_config.get("dataformat_ohlcv", "json"),
)
price_data: pd.DataFrame = load_pair_data(PAIR,TIMEFRAME, timerange=TRAINING_RANGE, startup_candles=REQUIRED_STARTUP_CANDLES)
print('Loaded data:\n', price_data)

Loaded data:
[32m2023-10-12 14:37:05.757[0m | [1mINFO    [0m | [36mlazyft.data_loader[0m:[36mload_pair_data[0m:[36m47[0m - [1mLoaded 11153 rows for BTC/USDT @ timeframe 1h, data starts at 2019-09-23 08:00:00+00:00[0m
                            date  open  high   low  close  volume
0     2019-09-23 08:00:00+00:00 9.93K 9.93K 9.93K  9.93K   0.001
1     2019-09-23 09:00:00+00:00 9.93K 9.93K 9.93K  9.93K   0.000
2     2019-09-23 10:00:00+00:00 9.93K 9.93K 9.93K  9.93K   0.000
3     2019-09-23 11:00:00+00:00 9.93K 9.93K 9.93K  9.93K   0.000
4     2019-09-23 12:00:00+00:00 9.93K 9.93K 9.93K  9.93K   0.000
...                         ...   ...   ...   ...    ...     ...
11148 2020-12-30 20:00:00+00:00 28.8K   29K 28.6K  28.8K  11.538
11149 2020-12-30 21:00:00+00:00 28.8K   29K 28.6K  28.9K  16.847
11150 2020-12-30 22:00:00+00:00 28.9K 28.9K 28.6K  28.7K   4.966
11151 2020-12-30 23:00:00+00:00 28.7K 28.9K 28.6K  28.9K  11.346
11152 2020-12-31 00:00:00+00:00 28.9K 29.3K 28.9K  29.1

  pairdata['date'] = to_datetime(pairdata['date'],


In [9]:
from user_data.strategies import custom_indicators

price_data = custom_indicators.add_smi_signal(price_data)
price_data = custom_indicators.add_ema_signal(price_data)
price_data = custom_indicators.add_sar_signal(price_data)
price_data = custom_indicators.add_ha_signal(price_data)
price_data = custom_indicators.add_adx_di_signal(price_data)
price_data = custom_indicators.add_rsi_signal(price_data)
price_data = custom_indicators.add_bollinger_signal(price_data)
# price_data = custom_indicators.add_donchian_trend_signal(price_data)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["bollinger_signal"].iloc[i] = 1


In [10]:
import numpy as np

selected_data = price_data.drop(columns=["open", "close", "high", "low", "volume"])
# set to float32
selected_data_dict = {row['date']: row[1:].values for _, row in selected_data.iterrows()}
for key in selected_data_dict:
    selected_data_dict[key] = np.array(selected_data_dict[key], dtype=np.float32)


print('Selected data:\n', selected_data_dict)
from trading_environments.my_freqtrade_env4 import Actions, SagesFreqtradeEnv4
from stable_baselines3.common.env_checker import check_env
from stable_baselines3.common.utils import set_random_seed
env = SagesFreqtradeEnv4(
    data=selected_data_dict,
    prices=price_data[["date", "open", "close", "high", "low", "volume"]],
    window_size=WINDOW_SIZE,  # how many past candles should it use as features
    pair=PAIR,
    stake_amount=STAKE_AMOUNT,
    starting_balance=STARTING_CAPITAL,
    punish_holding_amount=-2,
)
check_env(env)
trading_env = Monitor(env, LOG_DIR)
set_random_seed(1111, using_cuda=True)
config['action_shape'] = len(Actions)
config['punish_holding_amount'] = env.punish_holding_amount


Selected data:
 {Timestamp('2019-09-23 08:00:00+0000', tz='UTC'): array([0., 0., 0., 0., 0., 0., 0.], dtype=float32), Timestamp('2019-09-23 09:00:00+0000', tz='UTC'): array([0., 0., 0., 0., 0., 0., 0.], dtype=float32), Timestamp('2019-09-23 10:00:00+0000', tz='UTC'): array([0., 0., 0., 0., 0., 0., 0.], dtype=float32), Timestamp('2019-09-23 11:00:00+0000', tz='UTC'): array([0., 0., 0., 0., 0., 0., 0.], dtype=float32), Timestamp('2019-09-23 12:00:00+0000', tz='UTC'): array([0., 0., 0., 0., 0., 0., 0.], dtype=float32), Timestamp('2019-09-23 13:00:00+0000', tz='UTC'): array([0., 0., 0., 0., 0., 0., 0.], dtype=float32), Timestamp('2019-09-23 14:00:00+0000', tz='UTC'): array([0., 0., 0., 0., 0., 0., 0.], dtype=float32), Timestamp('2019-09-23 15:00:00+0000', tz='UTC'): array([0., 0., 0., 0., 0., 0., 0.], dtype=float32), Timestamp('2019-09-23 16:00:00+0000', tz='UTC'): array([0., 0., 0., 0., 0., 0., 0.], dtype=float32), Timestamp('2019-09-23 17:00:00+0000', tz='UTC'): array([0., 0., 0., 0., 0.

In [11]:
from pathlib import Path
from tb_callbacks import SaveOnStepCallback
from stable_baselines3 import DQN
from sb3_contrib.ppo_recurrent import RecurrentPPO
from sb3_contrib.qrdqn import QRDQN


# POLICY_KWARGS = dict(net_arch=[128, 128])

if MODEL_NAME:
        # load existing model
        model = RecurrentPPO.load(
            MODEL_DIR / MODEL_NAME.strip(".zip"),
            # tensorboard_log=TENSORBOARD_LOG,
        )
        logger.success(f"Loaded model from {MODEL_DIR / MODEL_NAME}")
        model.set_env(trading_env)
else:
    # policy = RecurrentActorCriticPolicy
    model = RecurrentPPO(
        # See https://stable-baselines3.readthedocs.io/en/master/guide/algos.html for other algos with discrete action space
        # "MultiInputLstmPolicy",
        "MlpLstmPolicy",
        # "MlpPolicy",
        trading_env,
        verbose=0,
        device="cuda",
        # tensorboard_log=TENSORBOARD_LOG,
        n_steps=len(price_data) - WINDOW_SIZE,
        # gradient_steps=-1,
        gamma=0.9,
        learning_rate=0.0001,
        batch_size=64,
        # ent_coef=0.0001123894292050861,
        # gae_lambda=0.8789545362092943,
        # reuse=True
        # policy_kwargs=POLICY_KWARGS,
    )
start_date = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
base_name = f"{STRATEGY}_{trading_env.env.__class__.__name__}_{model.__class__.__name__}_{start_date}"
tb_callback = SaveOnStepCallback(
    check_freq=len(price_data)*5,
    save_name=f"best_model_{base_name}",
    save_dir=str(MODEL_DIR),
    log_dir=LOG_DIR,
    verbose=1,
)
config['model_type'] = model.__class__.__name__
config['save_name'] = base_name
env.set_log_dir(Path('trade_logs', start_date))
print(base_name)

[32m2023-10-12 14:37:10.224[0m | [1mINFO    [0m | [36mtrading_environments.my_freqtrade_env4[0m:[36mset_log_dir[0m:[36m623[0m - [1mLog directory set to trade_logs/20231012_143710[0m
SagesGym5_SagesFreqtradeEnv4_RecurrentPPO_20231012_143710


In [12]:
run = wandb.init(project="freqgym", entity="rtnanje", config=config)

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit:[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /Users/raphael/.netrc


In [13]:
from wandb.integration.sb3 import WandbCallback

logger.info("Learning started.")
callback = WandbCallback(
    model_save_path=f"models/best_model_{run.id}",
    verbose=2,
    model_save_freq=len(price_data) * 5,
)

# wandb.watch(model)
model.learn(total_timesteps=LEARNING_TIME_STEPS, callback=callback)


[32m2023-10-12 14:37:22.704[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m3[0m - [1mLearning started.[0m
{'Step/total_reward': -1000.0, 'Step/balance': 1000.0, 'Step/trades': 0.0, 'Step/total_profit_pct': 0.0, 'Step/custom_score': 0.0, 'Step/avg_profit_pct': 0.0, 'Step/winning_trades': 0.0, 'Step/losing_trades': 0.0, 'Step/win_ratio': 0.0, 'Step/average_duration (d)': 0.0}
{'Step/total_reward': -1020.0, 'Step/balance': 996.0, 'Step/trades': 4.0, 'Step/total_profit_pct': -3.98, 'Step/custom_score': -15.92, 'Step/avg_profit_pct': -1.0, 'Step/winning_trades': 0.0, 'Step/losing_trades': 4.0, 'Step/win_ratio': 0.0, 'Step/average_duration (d)': 0.13541666666666666}
{'Step/total_reward': -1000.0, 'Step/balance': 1000.0, 'Step/trades': 0.0, 'Step/total_profit_pct': 0.0, 'Step/custom_score': 0.0, 'Step/avg_profit_pct': 0.0, 'Step/winning_trades': 0.0, 'Step/losing_trades': 0.0, 'Step/win_ratio': 0.0, 'Step/average_duration (d)': 0.0}
{'Step/total_reward': -1003.0, 'Step/

KeyboardInterrupt: 