# Imports

In [None]:
%load_ext autoreload
%autoreload 2
import os
if os.uname().sysname == 'Darwin' or os.uname().nodename.startswith('aidf'):
    base_path = './runs/'
elif os.uname().sysname == 'Linux' and os.uname().nodename.startswith('Desktop'):
    base_path = '../cot-gan/trained/'

import numpy as np
import pandas as pd
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
import torch
torch.set_float32_matmul_precision('high')
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.utils import set_random_seed
from stable_baselines3.common.callbacks import CheckpointCallback
from base import *
# from train import *
device = torch.device('cpu')
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

# Experiment Settings

In [None]:
# ENV
max_long = 1.
max_short = -1.0
interest_rate = 0.0
transaction_cost = 0.001
n_training_steps = 10e6
n_eval_episodes = 100
signature_features = False # whether to transform the historical sequence of prices into signature features
rl_n_levels = 3 # number of levels in the signature feature in the observation space
rl_lead_lag = [1] # number of lags in the signature feature in the observation space

trading_calendar = 'NYSE'
gen_start_date = '1995-02-01' # includes the historical path used to condition the generator
gen_end_date = '2022-10-19'
random_dates = True # if True, the generated sequences can start from any randomly sampled date between (1995-01-01 + max(p,o,q) trading days) and (2022-10-19 - (hist_len + n_periods) trading days)
sample_len = 1300 # used only if random_dates is True else it is based on gen_start_date and gen_end_date
gen_batch_size = 1 if random_dates else 160

random_seed = False
seed = np.random.randint(0, 100000) if random_seed else 504
torch.manual_seed(seed)
set_random_seed(seed)

# GENERATOR

# path = 'Feb26_08-21-16_aidf-svr-gpu03_Realdt_LSTMd_truncated_rq_5/' # n_levels=5, order=5, noise dim 4, batch_size=38
path = 'SPX_Apr08-09-57SPX_e1r1s42/' # n_levels=10, order=1, noise dim 4, batch_size=38
wgt_filename = 'generator/'

continue_training = False
agent_path = './runs/Mar20_14-28-38_lus-imac.lan_PPO_truncated_rq_10/'

### Generator params

In [None]:
events_path = base_path + path + 'log/'
params = get_hparams(events_path)
params['data_params']['sample_len'] = sample_len
params

In [None]:
generator = get_generator({**params['model_params'], **params['data_params']}, base_path+path+wgt_filename)

### RL Params

In [None]:
# ENV
max_long = 1.
max_short = -1.0
interest_rate = 0.0
transaction_cost = 0.001
n_training_steps = 10e6
n_eval_episodes = 100
signature_features = False # whether to transform the historical sequence of prices into signature features
rl_n_levels = 3 # number of levels in the signature feature in the observation space
rl_lead_lag = [1] # number of lags in the signature feature in the observation space

trading_calendar = 'NYSE'
gen_start_date = '1995-02-01' # includes the historical path used to condition the generator
gen_end_date = '2022-10-19'
random_dates = True # if True, the generated sequences can start from any randomly sampled date between (1995-01-01 + max(p,o,q) trading days) and (2022-10-19 - (hist_len + n_periods) trading days)
sample_len = 1300 # used only if random_dates is True else it is based on gen_start_date and gen_end_date
gen_batch_size = 4 if random_dates else 160

random_seed = False
seed = np.random.randint(0, 100000) if random_seed else 504
torch.manual_seed(seed)
set_random_seed(seed)

# GENERATOR

# path = 'Feb26_08-21-16_aidf-svr-gpu03_Realdt_LSTMd_truncated_rq_5/' # n_levels=5, order=5, noise dim 4, batch_size=38
path = 'SPX_Apr08-09-57SPX_e1r1s42/' # n_levels=10, order=1, noise dim 4, batch_size=38
wgt_filename = 'generator/'

In [None]:
sample_len = params['data_params']['sample_len']
hist_len = params['data_params']['hist_len']
df = pd.read_csv('./data/spx_20231229.csv', index_col=0, parse_dates=True)
df = df.loc['1995-01-01':, :]
if not random_dates:
    sample_len = len(df[gen_start_date:gen_end_date])
n_periods = sample_len - hist_len # number of periods where the agent can trade
window_len = hist_len # number of periods in the historical window that is part of the state

# AGENT TYPE
RL_algo = 'PPO'         # DDPG / SAC / A2C / TD3 / PPO
policy_kwargs = dict(
    # activation_fn=torch.nn.ReLU,
    activation_fn=torch.nn.Tanh,
    net_arch=[64, 64, 32],
    # log_std_init=-1.0       # default is 0 for PPO/A2C / -3 for SAC
)
arguments = {
    'device': device,
    # 'gamma': 0.0,                    # default is 0.99 for PPO/SAC/DDPG/TD3
    'gae_lambda': 0.9,                 # default is 0.95 for PPO
    'vf_coef': 1.0,                   # default is 0.5 for PPO
    'n_steps': n_periods*50, # default is 2048 for PPO / 5 for A2C which uses batch_size = n_steps * n_envs
    'batch_size': 250,                 # default is 64 for PPO, 256 for SAC / 100 for DDPG
    # 'n_epochs': 10,                    # default is 10 for PPO
    'learning_rate': 3e-5,           # default is 0.0003 for PPO/SAC / 0.001 for TD3
    'clip_range': 0.07,                # default is 0.2 for PPO
    # 'max_grad_norm': 1e10,             # default is 0.5 for PPO
    'policy_kwargs':policy_kwargs
}
rl_params = {**{'algo': RL_algo},
             **{k: v for k, v in arguments.items() if k!='policy_kwargs' and k!='device'},
             **{k: v if k!='activation_fn' else str(v).split('.')[-1][:-2] for k, v in policy_kwargs.items()}}


env_params = {
    'max_long': max_long,
    'max_short': max_short,
    'interest_rate': interest_rate,
    'transaction_cost': transaction_cost,
    'n_training_steps': n_training_steps,
    'n_eval_episodes': n_eval_episodes,
    'signature_features': signature_features,
    'trading_calendar': trading_calendar,
    'gen_start_date': gen_start_date,
    'gen_end_date': gen_end_date,
    'random_dates': random_dates,
    'sample_len': sample_len,
    'window_len': window_len,
    'hist_len': hist_len,
    'n_periods': sample_len - hist_len,
    'gen_batch_size': gen_batch_size,
    'random_seed': random_seed,
    'seed': seed,
    'path': path,
    'wgt_filename': wgt_filename
}
if signature_features:
    env_params['rl_n_levels'] = rl_n_levels
    env_params['rl_lead_lag'] = rl_lead_lag

### Get all objects

In [None]:
# save parameters to tensorboard
writer = start_writer(params['data_params'], params['model_params'], params['training_params'], rl_params, env_params)

# get generator
path_generator = GARCH_path_generator(generator, df, '1995-01-01', '2022-10-19', 20, 0, 0, 'Zero', 'GARCH', 'gaussian', 0, seed)

# get env
env = get_rl_env(path_generator, writer, env_params, seed=seed)
env.plot_episode_freq = 1000000

# get agent
agent = get_rl_agent(RL_algo, env, arguments, writer.log_dir, seed=seed)

# Training

In [None]:
checkpoint_callback = CheckpointCallback(
    save_freq=1000000,
    save_path=writer.log_dir + '/checkpoints/',
    name_prefix='agent',
)
if continue_training:
    # log_path = f'{agent_path}rl_training_1/'
    agent = PPO.load(agent_path + 'agent.zip', tensorboard_log=writer.log_dir)
    agent.set_env(env)
agent.learn(n_training_steps, tb_log_name='rl_training', callback=checkpoint_callback)

In [None]:
agent.save(writer.log_dir + '/agent.zip')

In [None]:
baseline_average_final_wealth, baseline_final_wealth_std, baseline_final_wealth_mad, baseline_wealth_std, baseline_time_growth_rate, baseline_ensemble_growth_rate, b_num_bankrupt, b_num_episodes = env.baseline_results()
average_final_wealth, final_wealth_std, final_wealth_mad, wealth_std, time_growth_rate, ensemble_growth_rate, num_bankrupt, num_episodes = env.agent_results()

print(f'Baseline / Agent Average Final Wealth: {baseline_average_final_wealth:.4f} / {average_final_wealth:.4f}')
print(f'Baseline / Agent Final Wealth Std: {baseline_final_wealth_std:.4f} / {final_wealth_std:.4f}')
print(f'Baseline / Agent Final Wealth MAD: {baseline_final_wealth_mad:.4f} / {final_wealth_mad:.4f}')
print(f'Baseline / Agent Time Growth Rate: {baseline_time_growth_rate:.4f} / {time_growth_rate:.4f}')
print(f'Baseline / Agent Ensemble Growth Rate: {baseline_ensemble_growth_rate:.4f} / {ensemble_growth_rate:.4f}')
print(f'Agent Bankrupt Episodes: {num_bankrupt} / {num_episodes}')
print(f'Baseline Bankrupt Episodes: {b_num_bankrupt} / {b_num_episodes}')

# Evaluation

### Using same generator

In [None]:
if 'agent' not in locals():
    agent = PPO.load(agent_path + 'agent.zip')

In [None]:
eval_env = get_rl_env(path_generator, writer, env_params, eval=True)
evaluate_policy(agent, eval_env, n_eval_episodes=n_eval_episodes, deterministic=True)

In [None]:
baseline_average_final_wealth, baseline_final_wealth_std, baseline_final_wealth_mad, baseline_wealth_std, baseline_time_growth_rate, baseline_ensemble_growth_rate, b_num_bankrupt, b_num_episodes = eval_env.baseline_results()
average_final_wealth, final_wealth_std, final_wealth_mad, wealth_std, time_growth_rate, ensemble_growth_rate, num_bankrupt, num_episodes = eval_env.agent_results()

print(f'Baseline / Agent Average Final Wealth: {baseline_average_final_wealth:.4f} / {average_final_wealth:.4f}')
print(f'Baseline / Agent Final Wealth Std: {baseline_final_wealth_std:.4f} / {final_wealth_std:.4f}')
print(f'Baseline / Agent Final Wealth MAD: {baseline_final_wealth_mad:.4f} / {final_wealth_mad:.4f}')
print(f'Baseline / Agent Time Growth Rate: {baseline_time_growth_rate:.4f} / {time_growth_rate:.4f}')
print(f'Baseline / Agent Ensemble Growth Rate: {baseline_ensemble_growth_rate:.4f} / {ensemble_growth_rate:.4f}')
print(f'Agent Bankrupt Episodes: {num_bankrupt} / {num_episodes}')
print(f'Baseline Bankrupt Episodes: {b_num_bankrupt} / {b_num_episodes}')

In [None]:
eval_env.weight_plot(step=1, title='Plots/Evaluation Weights')

In [None]:
eval_env.weight_mae_plot(step=1, title='Plots/Evaluation Weights MAE')

In [None]:
eval_env.final_wealth_plot()

### Using real data

In [None]:
df = pd.read_csv('./data/spx_20231229.csv', index_col=0, parse_dates=True)
df = df.loc['1995-01-01':]
df.to_csv('./data/spx_rl.csv')
path = './data/spx_rl.csv'

In [None]:
# get env
env_params = {'n_periods': len(df) - env_params['window_len'],
              'stride': 100,
              'window_len': window_len,
              'interest_rate': interest_rate,
              'transaction_cost': transaction_cost,
              'signature_features': signature_features}
if signature_features:
    env_params['rl_n_levels'] = rl_n_levels
    env_params['rl_lead_lag'] = rl_lead_lag
real_env = get_real_data_env(path, env_params, writer=writer)
real_env.plot_episode_freq = 1

In [None]:
evaluate_policy(agent, real_env, n_eval_episodes=1, deterministic=True)

In [None]:
baseline_average_final_wealth, baseline_final_wealth_std, baseline_final_wealth_mad, baseline_wealth_std, baseline_time_growth_rate, baseline_ensemble_growth_rate, b_num_bankrupt, b_num_episodes = real_env.baseline_results()
average_final_wealth, final_wealth_std, final_wealth_mad, wealth_std, time_growth_rate, ensemble_growth_rate, num_bankrupt, num_episodes = real_env.agent_results()

print(f'Baseline / Agent Average Final Wealth: {baseline_average_final_wealth:.4f} / {average_final_wealth:.4f}')
print(f'Baseline / Agent Final Wealth Std: {baseline_final_wealth_std:.4f} / {final_wealth_std:.4f}')
print(f'Baseline / Agent Final Wealth MAD: {baseline_final_wealth_mad:.4f} / {final_wealth_mad:.4f}')
print(f'Baseline / Agent Time Growth Rate: {baseline_time_growth_rate:.4f} / {time_growth_rate:.4f}')
print(f'Baseline / Agent Ensemble Growth Rate: {baseline_ensemble_growth_rate:.4f} / {ensemble_growth_rate:.4f}')
print(f'Agent Bankrupt Episodes: {num_bankrupt} / {num_episodes}')
print(f'Baseline Bankrupt Episodes: {b_num_bankrupt} / {b_num_episodes}')

In [None]:
real_env.agent_wealth_plot(baseline=True, plot_range=False, title='Plots/Agent wealth (Real data)')