In [None]:
from config import crypto
from config import general as config
from finrl.meta.env_cryptocurrency_trading.env_multiple_crypto import CryptoEnv
from finrl.meta.env_custom.env_custom import CustomTradingEnv
from lib.drl import load_dataset, data_split
from lib.stocks_strategy import StocksStrategy
from lib.support import check_directory_structure, get_time, get_duration, log_duration
# from finrl.agents.elegantrl.elegantrl_models import DRLAgent as DRLAgent_erl
from stable_baselines3 import A2C
from stable_baselines3 import DDPG
from stable_baselines3 import PPO
from stable_baselines3 import SAC
from stable_baselines3 import TD3

import numpy as np
import pandas as pd
import time
import os

from finrl.agents.stablebaselines3.drl_agent import DRLAgent
from finrl.meta.data_processor import DataProcessor

ROOT_DIR = '.'
# check_directory_structure(ROOT_DIR)

FILE_PREFIX = "crypto_single"
file_start = time.time()
file_total_timesteps = 500
MODELS = {"A2C": A2C, "DDPG": DDPG, "TD3": TD3, "SAC": SAC, "PPO": PPO}

if not os.path.exists(f"{ROOT_DIR}/{config.RESULTS_DIR}/{FILE_PREFIX}"):
    os.mkdir(f"{ROOT_DIR}/{config.RESULTS_DIR}/{FILE_PREFIX}")

In [None]:
def get_model(model_name):
    model = None
    if model_name not in MODELS:
        raise NotImplementedError("NotImplementedError")
    if model_name == "A2C":
        model = agent.get_model("A2C")
    if model_name == "DDPG":
        model = agent.get_model("DDPG")
    if model_name == "PPO":
        ppo_params = {"n_steps": 2048, "ent_coef": 0.01, "learning_rate": 0.00025, "batch_size": 128}
        model = agent.get_model("PPO", model_kwargs=ppo_params)
    if model_name == "TD3":
        td3_params = {"batch_size": 100, "buffer_size": 1000000, "learning_rate": 0.001}
        model = agent.get_model("TD3", model_kwargs=td3_params)
    if model_name == "SAC":
        sac_params = {"batch_size": 128, "buffer_size": 1000000, "learning_rate": 0.0001, "learning_starts": 100, "ent_coef": "auto_0.1"}
        model = agent.get_model("SAC", model_kwargs=sac_params)
    return model

def get_train_env(df):
    kwargs = ENV_KWARGS.copy()
    e_train_gym = CustomTradingEnv(df=train_df, **kwargs)
    env_train, _ = e_train_gym.get_sb_env()
    return env_train
    
def get_test_env(df, turb_thres=None):
    kwargs = ENV_KWARGS.copy()
    kwargs['mode'] = 'test'
    e_trade_gym = CustomTradingEnv(df=df, turbulence_threshold=turb_thres, **kwargs)
    return e_trade_gym

def get_filenames(model_name):
    res_file_prefix = f"{root_dir}/{results_dir}/{FILE_PREFIX}/{FILE_PREFIX}_{model_name}"
    model_filename = f"{root_dir}/{trained_model_dir}/{FILE_PREFIX}/{FILE_PREFIX}_{model_name}_MODEL"
    return res_file_prefix, model_filename

def load_model_from_file(model_name, model_filename):
    model_file_exists = os.path.isfile(f"{model_filename}.zip")
    if not model_file_exists:
        raise ValueError("NoModelFileAvailableError")
    
    model_type = MODELS[model_name]
    loaded_model = model_type.load(f"{model_filename}.zip")
    print(f"loaded model from {model_filename}")
    return loaded_model

In [None]:
df = pd.read_csv(f"{config.DATA_SAVE_DIR}/thesis/crypto_1h_parsed.csv", index_col=0)
train_df = data_split(df, crypto.TRAIN_START_DATE, crypto.TRAIN_END_DATE)
test_df = data_split(df, crypto.TEST_START_DATE, crypto.TEST_END_DATE)
print(f"train {train_df.shape} start: {crypto.TRAIN_START_DATE} end: {crypto.TRAIN_END_DATE}")
print(f"test  {test_df.shape} start: {crypto.TEST_START_DATE} end: {crypto.TEST_END_DATE}")

In [None]:
stock_dimension = len(train_df.tic.unique())
state_space = 1 + 2 * stock_dimension + len(crypto.INDICATORS) * stock_dimension
print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}")

buy_cost_list = sell_cost_list = [0.001] * stock_dimension # cost per stock, we use the same for all, but could be varying
num_stock_shares = [0] * stock_dimension # how many stocks are in portfolio at the begin of the training, we initialize all with 0 for an empty portfolio

ENV_KWARGS = {
    "hmax": 10000,
    "initial_amount": 1000000,
    "num_stock_shares": num_stock_shares,
    "buy_cost_pct": buy_cost_list,
    "sell_cost_pct": sell_cost_list,
    "state_space": state_space,
    "stock_dim": stock_dimension,
    "tech_indicator_list": crypto.INDICATORS,
    "action_space": stock_dimension,
    "reward_scaling": 1e-4,
    "make_plots": True,
    "mode": "train"
}

root_dir=ROOT_DIR
results_dir=config.RESULTS_DIR
trained_model_dir=config.TRAINED_MODEL_DIR

run_name = "crypto_single"

# A2C

In [None]:
model_name="A2C"
results_file_prefix, model_filename = get_filenames(model_name)

In [None]:
env_train = get_train_env(train_df)

# # TRAIN
total_timesteps = file_total_timesteps
agent = DRLAgent(env=env_train)

model = get_model(model_name)
    
start = time.time()
trained_model = agent.train_model(model=model, tb_log_name=model_name, total_timesteps=total_timesteps)
log_duration(start)

trained_model.save(model_filename)

In [None]:
# e_train_gym = CustomTradingEnv(df=train_df, **env_kwargs)
# env_train, _ = e_train_gym.get_sb_env()
env_test = get_test_env(test_df)
loaded_model = load_model_from_file(model_name, model_filename)

start = time.time()
df_account_value, df_actions = DRLAgent.DRL_prediction(model=loaded_model, environment=env_test)
log_duration(start)

df_account_value.to_csv(f"{results_file_prefix}_portfolio_value.csv")
df_actions.to_csv(f"{results_file_prefix}_portfolio_actions.csv")

# DDPG

In [None]:
model_name="DDPG"
results_file_prefix, model_filename = get_filenames(model_name)

In [None]:
env_train = get_train_env(train_df)
   
# # TRAIN
total_timesteps = file_total_timesteps
agent = DRLAgent(env=env_train)

model = get_model(model_name)

start = time.time()
trained_model = agent.train_model(model=model, tb_log_name=model_name, total_timesteps=total_timesteps)
log_duration(start)

trained_model.save(model_filename)

In [None]:
env_test = get_test_env(test_df)
loaded_model = load_model_from_file(model_name, model_filename)

start = time.time()
df_account_value, df_actions = DRLAgent.DRL_prediction(model=loaded_model, environment=env_test)
log_duration(start)

df_account_value.to_csv(f"{results_file_prefix}_portfolio_value.csv")
df_actions.to_csv(f"{results_file_prefix}_portfolio_actions.csv")

# PPO

In [None]:
model_name="PPO"
results_file_prefix, model_filename = get_filenames(model_name)

In [None]:
env_train = get_train_env(train_df)

# # TRAIN
total_timesteps = file_total_timesteps
agent = DRLAgent(env=env_train)

model = get_model(model_name)
    
start = time.time()
trained_model = agent.train_model(model=model, tb_log_name=model_name, total_timesteps=total_timesteps)
log_duration(start)

trained_model.save(model_filename)

In [None]:
env_test = get_test_env(test_df)
loaded_model = load_model_from_file(model_name, model_filename)

start = time.time()
df_account_value, df_actions = DRLAgent.DRL_prediction(model=loaded_model, environment=env_test)
log_duration(start)

df_account_value.to_csv(f"{results_file_prefix}_portfolio_value.csv")
df_actions.to_csv(f"{results_file_prefix}_portfolio_actions.csv")

# TD3

In [None]:
model_name="TD3"
results_file_prefix, model_filename = get_filenames(model_name)

In [None]:
env_train = get_train_env(train_df)

# # TRAIN
total_timesteps = file_total_timesteps
agent = DRLAgent(env=env_train)

model = get_model(model_name)

start = time.time()
trained_model = agent.train_model(model=model, tb_log_name=model_name, total_timesteps=total_timesteps)
log_duration(start)

trained_model.save(model_filename)

In [None]:
env_test = get_test_env(test_df)
loaded_model = load_model_from_file(model_name, model_filename)

start = time.time()
df_account_value, df_actions = DRLAgent.DRL_prediction(model=loaded_model, environment=env_test)
log_duration(start)

df_account_value.to_csv(f"{results_file_prefix}_portfolio_value.csv")
df_actions.to_csv(f"{results_file_prefix}_portfolio_actions.csv")

# SAC

In [None]:
model_name="SAC"
results_file_prefix, model_filename = get_filenames(model_name)

In [None]:
env_train = get_train_env(train_df)
    
# # TRAIN
total_timesteps = file_total_timesteps
agent = DRLAgent(env=env_train)

model = get_model(model_name)
    
start = time.time()
trained_model = agent.train_model(model=model, tb_log_name=model_name, total_timesteps=total_timesteps)
log_duration(start)

trained_model.save(model_filename)

In [None]:
env_test = get_test_env(test_df)
loaded_model = load_model_from_file(model_name, model_filename)

start = time.time()
df_account_value, df_actions = DRLAgent.DRL_prediction(model=loaded_model, environment=env_test)
log_duration(start)

df_account_value.to_csv(f"{results_file_prefix}_portfolio_value.csv")
df_actions.to_csv(f"{results_file_prefix}_portfolio_actions.csv")

In [None]:
print("done")
log_duration(file_start)