# Importing Libraries

In [7]:
import numpy as np
import pandas as pd
import tensorflow as tf
import config
from tf_agents.environments.tf_py_environment import TFPyEnvironment
from sklearn.preprocessing import MinMaxScaler
from agents.tfagents.dqn import DQNAgent
from agents.tfagents.ppo import PPOAgent
from environments.environment import TradingEnvironment
from environments.wrappers.tf.tfenv import TFTradingEnvironment
from environments.rewards.smurf import SmurfRewardFunction
from environments.rewards.marketorder import MarketOrderRF
from environments.rewards.marketlimitorder import MarketLimitOrderRF
from metrics.trading.pnl import CumulativeLogReturn
from metrics.trading.risk import InvestmentRisk
from metrics.trading.sharpe import SharpeRatio
from metrics.trading.sortino import SortinoRatio
from metrics.trading.drawdown import MaximumDrawdown

# Reading Datasets

In [8]:
def read_dataset(
        dataset_filepath,
        timeframe_size,
        target_horizon_len,
        num_eval_samples,
        fees,
        reward_fn_instance
):
    # Reading dataset
    crypto_dataset_df = pd.read_csv(config.dataset_save_filepath.format(dataset_filepath))
    samples_df = crypto_dataset_df[config.regression_features]

    # Scaling data
    scaler = MinMaxScaler(feature_range=(0, 1.0))
    samples = samples_df.to_numpy(dtype=np.float32)

    num_train_scale_samples = samples.shape[0] - num_eval_samples - target_horizon_len - timeframe_size + 1
    samples[: num_train_scale_samples] = scaler.fit_transform(samples[: num_train_scale_samples])
    samples[num_train_scale_samples: ] = scaler.transform(samples[num_train_scale_samples: ])

    # Constructing timeframes for train, test
    inputs = np.float32([samples[i: i + timeframe_size] for i in range(samples.shape[0] - timeframe_size - target_horizon_len + 1)])

    # Splitting inputs to train-test data
    num_train_inputs = inputs.shape[0] - num_eval_samples
    x_train = inputs[: num_train_inputs]
    x_eval = inputs[num_train_inputs:]

    # Computing reward functions for train, test data
    closes = crypto_dataset_df['close'].to_numpy(dtype=np.float32)
    highs = crypto_dataset_df['high'].to_numpy(dtype=np.float32)
    lows = crypto_dataset_df['low'].to_numpy(dtype=np.float32)

    train_reward_fn = SmurfRewardFunction(reward_function=reward_fn_instance(
        timeframe_size=timeframe_size,
        target_horizon_len=target_horizon_len,
        highs=highs[: samples.shape[0] - num_eval_samples],
        lows=lows[: samples.shape[0] - num_eval_samples],
        closes=closes[: samples.shape[0] - num_eval_samples],
        fees_percentage=fees
    ))

    eval_reward_fn = SmurfRewardFunction(reward_function=reward_fn_instance(
        timeframe_size=timeframe_size,
        target_horizon_len=target_horizon_len,
        highs=highs[samples.shape[0] - num_eval_samples - timeframe_size - target_horizon_len + 1:],
        lows=lows[samples.shape[0] - num_eval_samples - timeframe_size - target_horizon_len + 1:],
        closes=closes[samples.shape[0] - num_eval_samples - timeframe_size - target_horizon_len + 1:],
        fees_percentage=fees
    ))

    assert x_train.shape[0] == train_reward_fn.get_reward_fn_shape()[0], \
        f'AssertionError: DimensionMismatch: x_train: {x_train.shape}, train_reward_fn: {train_reward_fn.get_reward_fn_shape()}'
    assert x_eval.shape[0] == eval_reward_fn.get_reward_fn_shape()[0], \
        f'AssertionError: DimensionMismatch: x_eval: {x_eval.shape}, eval_reward_fn: {eval_reward_fn.get_reward_fn_shape()}'

    return x_train, train_reward_fn, x_eval, eval_reward_fn

# Building Agent

In [9]:
def build_agent(
        agent_instance,
        observation_spec,
        action_spec,
        time_step_spec,
        env_batch_size,
        checkpoint_filepath,
        fc_layers,
        conv_layers
):
    return agent_instance(
        input_tensor_spec=observation_spec,
        action_spec=action_spec,
        time_step_spec=time_step_spec,
        env_batch_size=env_batch_size,
        checkpoint_filepath=checkpoint_filepath,
        fc_layers=fc_layers,
        conv_layers=conv_layers
    )

# Building Trainer

In [10]:
def train(
        dataset_filepath,
        timeframe_size,
        target_horizon_len,
        num_eval_samples,
        fees,
        reward_fn_instance,
        agent_instance,
        checkpoint_filepath,
        fc_layers,
        conv_layers,
        train_episode_steps,
        train_iterations,
        eval_episodes,
        steps_per_eval,
        steps_per_log,
        steps_per_checkpoint,
        save_best_only
):
    x_train, train_reward_fn, x_eval, eval_reward_fn = read_dataset(
        dataset_filepath=dataset_filepath,
        timeframe_size=timeframe_size,
        target_horizon_len=target_horizon_len,
        num_eval_samples=num_eval_samples,
        fees=fees,
        reward_fn_instance=reward_fn_instance
    )
    train_env = TFTradingEnvironment(
        env=TradingEnvironment(env_config={
            'states': x_train,
            'reward_fn': train_reward_fn,
            'episode_steps': train_episode_steps,
            'metrics': [CumulativeLogReturn(), InvestmentRisk(), SharpeRatio(), SortinoRatio(), MaximumDrawdown()]
        })
    )
    eval_env = TFTradingEnvironment(
        env=TradingEnvironment(env_config={
            'states': x_eval,
            'reward_fn': eval_reward_fn,
            'episode_steps': x_eval.shape[0] - 1,
            'metrics': [CumulativeLogReturn(), InvestmentRisk(), SharpeRatio(), SortinoRatio(), MaximumDrawdown()]
        })
    )

    tf_train_env = TFPyEnvironment(environment=train_env)
    tf_eval_env = TFPyEnvironment(environment=eval_env)

    agent = build_agent(
        agent_instance=agent_instance,
        observation_spec=tf_train_env.observation_spec(),
        action_spec=tf_train_env.action_spec(),
        time_step_spec=tf_train_env.time_step_spec(),
        env_batch_size=tf_train_env.batch_size,
        checkpoint_filepath=checkpoint_filepath,
        fc_layers=fc_layers,
        conv_layers=conv_layers,
    )

    agent.initialize()

    eval_avg_returns = agent.train(
        train_env=tf_train_env,
        eval_env=tf_eval_env,
        train_iterations=train_iterations,
        eval_episodes=eval_episodes,
        iterations_per_eval=steps_per_eval,
        iterations_per_log=steps_per_log,
        iterations_per_checkpoint=steps_per_checkpoint,
        save_best_only=save_best_only
    )
    eval_metrics = eval_env.get_metrics()
    return eval_avg_returns, eval_metrics

# Building Train Configs

In [11]:
datasets_dict = {'BTC': 'BTC', 'ETH': 'ETH', 'ADA': 'ADA', 'XRP': 'XRP', 'LTC': 'LTC'}
rewards_dict = {
    'Market-Orders':  MarketOrderRF,
    'Market-Limit Orders': MarketLimitOrderRF
}
agents_configs = {
    'PPO': {
        'agent_instance': PPOAgent,
        'train_iterations': 1000,
        'steps_per_eval': 10,
        'steps_per_log': 10,
        'steps_per_checkpoint': 10
    },
    'DDQN': {
        'agent_instance': DQNAgent,
        'train_iterations': 50000,
        'steps_per_eval': 500,
        'steps_per_log': 500,
        'steps_per_checkpoint': 500
    }
}
train_dict = {
    'timeframe_size': 12,
    'target_horizon_len': 20,
    'num_eval_samples': 2250,
    'fees': 0.01,
    'fc_layers': [256, 256],
    'conv_layers': [(32, 3, 1)],
    'train_episode_steps': 100,
    'eval_episodes': 1,
    'save_best_only': True
}

# Run TraderNet Experiments

In [12]:
results = {
    'PPO': {dataset_name: {} for dataset_name in datasets_dict.keys()},
    'DDQN': {dataset_name: {} for dataset_name in datasets_dict.keys()}
}

colors = {
    'BTC': 'green',
    'ETH': 'blue',
    'XRP': 'red',
    'ADA': 'black',
    'LTC': 'orange'
}
linestyles = {
    'Market-Orders': '--',
    'Market-Limit-Orders': '-'
}

for agent_name, agent_config in agents_configs.items():
    for dataset_name, dataset_filepath in datasets_dict.items():
        for reward_fn_name, reward_fn_instance in rewards_dict.items():
            tf.random.set_seed(seed=0)

            train_params = {
                'dataset_filepath': dataset_filepath,
                'reward_fn_instance': reward_fn_instance,
                'checkpoint_filepath': f'database/storage/checkpoints/experiments/smurf/{agent_name}/{dataset_name}/{reward_fn_name}/',
                **train_dict,
                **agent_config
            }
            eval_avg_returns, eval_metrics = train(**train_params)

            results[agent_name][dataset_name][reward_fn_name] = (eval_avg_returns, eval_metrics)

        for reward_fn_name, reward_fn_results in results[agent_name][dataset_name].items():
            eval_avg_returns, eval_metrics = reward_fn_results

            metrics_dict = {
                'steps': [10000*i for i in range(len(eval_avg_returns))],
                'average_returns': eval_avg_returns,
                **{metric.name: metric.episode_metrics for metric in eval_metrics}
            }
            metrics_df = pd.DataFrame(metrics_dict)
            metrics_df.to_csv(f'experiments/smurf/{agent_name}/{dataset_name}_{reward_fn_name}.csv', index=False)

[-0.01853404 -0.00914936  0.0055    ]
[-0.00677133 -0.00911128  0.0055    ]
Collecting Initial Samples...
Training has started...

New best average return found at -16.57178497314453! Saving checkpoint at iteration 0
Instructions for updating:
back_prop=False is deprecated. Consider using tf.stop_gradient instead.
Instead of:
results = tf.foldr(fn, elems, back_prop=False)
Use:
results = tf.nest.map_structure(tf.stop_gradient, tf.foldr(fn, elems))


Instructions for updating:
back_prop=False is deprecated. Consider using tf.stop_gradient instead.
Instead of:
results = tf.foldr(fn, elems, back_prop=False)
Use:
results = tf.nest.map_structure(tf.stop_gradient, tf.foldr(fn, elems))



New best average return found at -12.422480583190918! Saving checkpoint at iteration 500

Iteration: 500
Train Loss: 0.002164715901017189
Average Return: -12.422480583190918

New best average return found at -4.974349498748779! Saving checkpoint at iteration 1000

Iteration: 1000
Train Loss: 0.002368217334151268
Average Return: -4.974349498748779

Iteration: 1500
Train Loss: 0.0034210672602057457
Average Return: -11.239537239074707

Iteration: 2000
Train Loss: 0.0015507049392908812
Average Return: -8.947919845581055

New best average return found at -2.382216691970825! Saving checkpoint at iteration 2500

Iteration: 2500
Train Loss: 0.0031417314894497395
Average Return: -2.382216691970825

New best average return found at 5.06573486328125! Saving checkpoint at iteration 3000

Iteration: 3000
Train Loss: 0.0056132301688194275
Average Return: 5.06573486328125

Iteration: 3500
Train Loss: 0.009303438477218151
Average Return: 2.7069029808044434

Iteration: 4000
Train Loss: 0.0033092633821

  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)



Iteration: 20500
Train Loss: 0.006115785799920559
Average Return: 0.6281086802482605

Iteration: 21000
Train Loss: 0.005628121551126242
Average Return: -4.084413051605225

Iteration: 21500
Train Loss: 0.0035359784960746765
Average Return: 7.508200168609619

Iteration: 22000
Train Loss: 0.004787422716617584
Average Return: 3.374601364135742

Iteration: 22500
Train Loss: 0.016201427206397057
Average Return: 2.8670308589935303

Iteration: 23000
Train Loss: 0.010247232392430305
Average Return: -2.7473440170288086

Iteration: 23500
Train Loss: 0.013739490881562233
Average Return: 8.578664779663086

Iteration: 24000
Train Loss: 0.012315332889556885
Average Return: -0.26625263690948486

Iteration: 24500
Train Loss: 0.016522977501153946
Average Return: -1.3087804317474365

Iteration: 25000
Train Loss: 0.0012990799732506275
Average Return: 3.111482858657837

Iteration: 25500
Train Loss: 0.013929511420428753
Average Return: 0.8290021419525146

Iteration: 26000
Train Loss: 0.007917647249996662
A

  return np.exp(average_returns/std_downfall_returns)



Iteration: 14500
Train Loss: 0.012537236325442791
Average Return: -0.3190191984176636

Iteration: 15000
Train Loss: 0.023972511291503906
Average Return: 5.57395601272583

Iteration: 15500
Train Loss: 0.011063171550631523
Average Return: 9.120756149291992

Iteration: 16000
Train Loss: 0.012248865328729153
Average Return: -4.242620468139648

Iteration: 16500
Train Loss: 0.01479065790772438
Average Return: 2.4166364669799805

Iteration: 17000
Train Loss: 0.022776387631893158
Average Return: 9.479940414428711

Iteration: 17500
Train Loss: 0.018192529678344727
Average Return: 6.183734893798828

Iteration: 18000
Train Loss: 0.011010328307747841
Average Return: 3.182417392730713

Iteration: 18500
Train Loss: 0.012737394310534
Average Return: 8.657953262329102

Iteration: 19000
Train Loss: 0.0435476154088974
Average Return: 4.152158260345459

Iteration: 19500
Train Loss: 0.02893104963004589
Average Return: 5.033951759338379

Iteration: 20000
Train Loss: 0.025625063106417656
Average Return: 5.

  return np.exp(average_returns/std_downfall_returns)



Iteration: 48500
Train Loss: 0.06042177602648735
Average Return: 16.117435455322266

Iteration: 49000
Train Loss: 0.04850027710199356
Average Return: 16.54693031311035

Iteration: 49500
Train Loss: 0.07344761490821838
Average Return: 16.248594284057617

Iteration: 50000
Train Loss: 0.09420417249202728
Average Return: 14.750248908996582
[-0.00268924  0.00833656  0.0055    ]
[0.00048856 0.00315544 0.0055    ]
Collecting Initial Samples...
Training has started...

New best average return found at 12.291582107543945! Saving checkpoint at iteration 0

Iteration: 500
Train Loss: 0.00220282468944788
Average Return: -3.736907482147217

Iteration: 1000
Train Loss: 0.002757929265499115
Average Return: 0.6621885299682617

Iteration: 1500
Train Loss: 0.004209998995065689
Average Return: 8.344561576843262

Iteration: 2000
Train Loss: 0.0019672829657793045
Average Return: -1.7098358869552612

Iteration: 2500
Train Loss: 0.0024183657951653004
Average Return: 5.365845203399658

Iteration: 3000
Train 

  return np.exp(average_returns/std_downfall_returns)



Iteration: 23500
Train Loss: 0.0453534796833992
Average Return: 10.824241638183594

Iteration: 24000
Train Loss: 0.08497361093759537
Average Return: 10.749188423156738

Iteration: 24500
Train Loss: 0.04144695773720741
Average Return: 11.465618133544922

Iteration: 25000
Train Loss: 0.0644921362400055
Average Return: 11.526368141174316

Iteration: 25500
Train Loss: 0.02836482599377632
Average Return: 12.08371639251709

Iteration: 26000
Train Loss: 0.02560250088572502
Average Return: 12.655488014221191

Iteration: 26500
Train Loss: 0.05495087429881096
Average Return: 11.808276176452637

Iteration: 27000
Train Loss: 0.034831173717975616
Average Return: 13.371866226196289

Iteration: 27500
Train Loss: 0.045236457139253616
Average Return: 13.369311332702637

Iteration: 28000
Train Loss: 0.08772952854633331
Average Return: 13.747553825378418

Iteration: 28500
Train Loss: 0.0548773854970932
Average Return: 12.388287544250488

Iteration: 29000
Train Loss: 0.029802102595567703
Average Return: 