# Importing Libraries

In [1]:
import numpy as np
import pandas as pd
import config
from tf_agents.environments.tf_py_environment import TFPyEnvironment
from sklearn.preprocessing import MinMaxScaler
from agents.tfagents.dqn import DQNAgent
from agents.tfagents.ppo import PPOAgent
from environments.environment import TradingEnvironment
from environments.wrappers.tf.tfenv import TFRuleTradingEnvironment
from environments.rewards.marketlimitorder import MarketLimitOrderRF
from rules.nconsecutive import NConsecutive
from metrics.trading.pnl import CumulativeLogReturn
from metrics.trading.risk import InvestmentRisk
from metrics.trading.sharpe import SharpeRatio
from metrics.trading.sortino import SortinoRatio
from metrics.trading.drawdown import MaximumDrawdown

# Building Eval Environments

In [2]:
def build_eval_environments(
        dataset_filepath,
        timeframe_size,
        target_horizon_len,
        num_eval_samples,
        fees,
        reward_fn_instance,
        n_consecutive_size: int or None
):
    # Reading dataset
    crypto_dataset_df = pd.read_csv(config.dataset_save_filepath.format(dataset_filepath))
    samples_df = crypto_dataset_df[config.regression_features]

    # Scaling data
    scaler = MinMaxScaler(feature_range=(0, 1.0))
    samples = samples_df.to_numpy(dtype=np.float32)

    num_train_scale_samples = samples.shape[0] - num_eval_samples - target_horizon_len - timeframe_size + 1
    samples[: num_train_scale_samples] = scaler.fit_transform(samples[: num_train_scale_samples])
    samples[num_train_scale_samples: ] = scaler.transform(samples[num_train_scale_samples: ])

    # Constructing timeframes for train, test
    inputs = np.float32([samples[i: i + timeframe_size] for i in range(samples.shape[0] - timeframe_size - target_horizon_len + 1)])

    # Splitting inputs to train-test data
    num_train_inputs = inputs.shape[0] - num_eval_samples
    x_eval = inputs[num_train_inputs:]

    # Computing reward functions for train, test data
    closes = crypto_dataset_df['close'].to_numpy(dtype=np.float32)
    highs = crypto_dataset_df['high'].to_numpy(dtype=np.float32)
    lows = crypto_dataset_df['low'].to_numpy(dtype=np.float32)

    eval_reward_fn = reward_fn_instance(
        timeframe_size=timeframe_size,
        target_horizon_len=target_horizon_len,
        highs=highs[samples.shape[0] - num_eval_samples - timeframe_size - target_horizon_len + 1:],
        lows=lows[samples.shape[0] - num_eval_samples - timeframe_size - target_horizon_len + 1:],
        closes=closes[samples.shape[0] - num_eval_samples - timeframe_size - target_horizon_len + 1:],
        fees_percentage=fees
    )

    assert x_eval.shape[0] == eval_reward_fn.get_reward_fn_shape()[0], \
        f'AssertionError: DimensionMismatch: x_eval: {x_eval.shape}, eval_reward_fn: {eval_reward_fn.get_reward_fn_shape()}'

    eval_env = TFRuleTradingEnvironment(
        env=TradingEnvironment(env_config={
            'states': x_eval,
            'reward_fn': eval_reward_fn,
            'episode_steps': x_eval.shape[0] - 1,
            'metrics': [CumulativeLogReturn(), InvestmentRisk(), SharpeRatio(), SortinoRatio(), MaximumDrawdown()]
        }),
        rules=None if n_consecutive_size is None else [NConsecutive(window_size=n_consecutive_size)]
    )
    tf_eval_env = TFPyEnvironment(environment=eval_env)
    return eval_env, tf_eval_env

# Building & Loading Agents

In [3]:
def build_agent(
        agent_instance,
        env,
        checkpoint_filepath,
        fc_layers,
        conv_layers
):
    agent = agent_instance(
        input_tensor_spec=env.observation_spec(),
        action_spec=env.action_spec(),
        time_step_spec=env.time_step_spec(),
        env_batch_size=env.batch_size,
        checkpoint_filepath=checkpoint_filepath,
        fc_layers=fc_layers,
        conv_layers=conv_layers
    )
    agent.initialize()
    return agent

# Build Evaluating Method

In [4]:
def eval(policy, tf_env_wrapper):
    time_step = tf_env_wrapper.reset()
    policy_state = policy.get_initial_state(tf_env_wrapper.batch_size)
    cumulative_rewards = 0.0
    cumulative_pnls = 0.0
    pnls = []

    while not time_step.is_last():
        action = policy.action(time_step=time_step, policy_state=policy_state).action
        time_step = tf_env_wrapper.step(action)
        reward = time_step.reward.numpy()[0]
        cumulative_rewards += reward

        if action != 2:
            cumulative_pnls += reward
        pnls.append(cumulative_pnls)
    return cumulative_rewards, pnls

# Building Configs

In [5]:
datasets_dict = {'BTC': 'BTC', 'ETH': 'ETH', 'ADA': 'ADA', 'XRP': 'XRP', 'LTC': 'LTC'}
reward_fn_dict = {
    'Market-Limit Orders': MarketLimitOrderRF
}
agent_dict = {
    'PPO': {
        'agent_instance': PPOAgent,
        'fc_layers': [256, 256],
        'conv_layers': [(32, 3, 1)]
    },
    'DDQN': {
        'agent_instance': DQNAgent,
        'fc_layers': [256, 256],
        'conv_layers': [(32, 3, 1)]
    }
}
env_dict = {
    'timeframe_size': 12,
    'target_horizon_len': 20,
    'num_eval_samples': 2250,
    'fees': 0.007
}
n_consecutive_config = {
    'No-N-Consecutive': None,
    '2-Consecutive': 2,
    '3-Consecutive': 3,
    '4-Consecutive': 4,
    '5-Consecutive': 5
}

# Run Experiments for PPO - Market-Limit Orders

In [6]:
for agent_name, agent_config in agent_dict.items():
    for dataset_name, dataset_filepath in datasets_dict.items():
        for reward_fn_name, reward_fn_instance in reward_fn_dict.items():
            results = {

            }
            episode_pnls = {

            }

            for n_consecutive_name, n_consecutive_size in n_consecutive_config.items():
                eval_env, tf_eval_env = build_eval_environments(
                    dataset_filepath=dataset_filepath,
                    reward_fn_instance=reward_fn_instance,
                    n_consecutive_size=n_consecutive_size,
                    **env_dict
                )
                agent = build_agent(
                    env=tf_eval_env,
                    checkpoint_filepath=f'database/storage/checkpoints/experiments/tradernet/{agent_name}/{dataset_name}/{reward_fn_name}/',
                    **agent_config
                )

                average_return, pnls = eval(policy=agent.policy, tf_env_wrapper=tf_eval_env)
                metrics = {
                    'n_consecutive': n_consecutive_name,
                    'average_returns': average_return,
                    **eval_env.get_episode_metrics()
                }
                for metric_name, metric_value in metrics.items():
                    if not metric_name in results:
                        results[metric_name] = [metric_value]
                    else:
                        results[metric_name].append(metric_value)
                episode_pnls[n_consecutive_name] = pnls

            results_df = pd.DataFrame(results)
            results_df.to_csv(f'experiments/nconsecutive/{agent_name}/{dataset_name}_{reward_fn_name}.csv', index=False)

            print(results_df, '\n')

            episode_pnls_df = pd.DataFrame(episode_pnls)
            episode_pnls_df.to_csv(f'experiments/nconsecutive/{agent_name}/{dataset_name}_{reward_fn_name}_cumul_pnls.csv', index=False)

            print(episode_pnls_df.tail(5))

























      n_consecutive  average_returns  Cumulative Log Returns  Investment Risk  \
0  No-N-Consecutive        13.173482               13.173482         0.477546   
1     2-Consecutive        11.096010               12.754096         0.475341   
2     3-Consecutive         9.373612               12.356829         0.474100   
3     4-Consecutive         7.669645               11.984006         0.473548   
4     5-Consecutive         5.978210               11.578395         0.472966   

     Sharpe   Sortino  Maximum Drawdown  
0  1.357097  4.475173          0.357828  
1  1.350635  4.256171          0.817935  
2  1.343170  4.071984          2.216883  
3  1.336888  3.917532          1.903167  
4  1.329550  3.759345          0.242472   

      No-N-Consecutive  2-Consecutive  3-Consecutive  4-Consecutive  \
2244         13.170206      11.092734       9.370336       7.666369   
2245         13.159067      11.081594       9.359196       7.655229   
2246         13.153347      11.075875       9.























      n_consecutive  average_returns  Cumulative Log Returns  Investment Risk  \
0  No-N-Consecutive        37.607645               37.967527         0.341551   
1     2-Consecutive         7.098605               28.076876         0.334643   
2     3-Consecutive        -9.646013               22.575620         0.319930   
3     4-Consecutive       -21.461435               18.467766         0.306843   
4     5-Consecutive       -29.829190               15.443058         0.297120   

     Sharpe    Sortino  Maximum Drawdown  
0  1.668653  73.033221          0.328964  
1  1.515023  24.391359          0.570467  
2  1.439638  12.714705          0.681282  
3  1.387812   7.803187          0.947738  
4  1.351851   5.649408          1.241333   

      No-N-Consecutive  2-Consecutive  3-Consecutive  4-Consecutive  \
2244         37.978799       7.466367      -9.274240     -21.089662   
2245         37.974140       7.466367      -9.274240     -21.089662   
2246         37.972376       7.466367   























































































































































































































































































































































































































































































































      n_consecutive  average_returns  Cumulative Log Returns  Investment Risk  \
0  No-N-Consecutive        27.939025               27.939025         0.353935   
1     2-Consecutive        15.494534               23.926457         0.353941   
2     3-Consecutive         7.288262               21.073078         0.354286   
3     4-Consecutive         0.583006               18.767351         0.355304   
4     5-Consecutive        -5.883242               16.541336         0.361246   

     Sharpe    Sortino  Maximum Drawdown  
0  1.701618  24.905018          0.156468  
1  1.624089  15.518031          0.165604  
2  1.575590  11.015303          0.151145  
3  1.529671   8.542355          0.143495  
4  1.486776   6.583300          0.158966   

      No-N-Consecutive  2-Consecutive  3-Consecutive  4-Consecutive  \
2244         27.856676      15.463331       7.257059       0.596085   
2245         27.878817      15.485472       7.279200       0.573944   
2246         27.910505      15.517160   























      n_consecutive  average_returns  Cumulative Log Returns  Investment Risk  \
0  No-N-Consecutive        13.630851               13.858670         0.472033   
1     2-Consecutive         7.932808               12.636576         0.466561   
2     3-Consecutive         3.682927               11.642580         0.463967   
3     4-Consecutive         0.274597               10.822504         0.463642   
4     5-Consecutive        -2.437025               10.118602         0.459600   

     Sharpe   Sortino  Maximum Drawdown  
0  1.375040  4.804385          1.108619  
1  1.358678  4.212637          3.248488  
2  1.342348  3.820261          0.345228  
3  1.329007  3.461268          1.335964  
4  1.316638  3.196487          0.267393   

      No-N-Consecutive  2-Consecutive  3-Consecutive  4-Consecutive  \
2244         13.871340       8.173297       3.923416       0.515086   
2245         13.870484       8.172441       3.922560       0.514230   
2246         13.870613       8.172570       3.















































      n_consecutive  average_returns  Cumulative Log Returns  Investment Risk  \
0  No-N-Consecutive        35.579471               35.579471         0.299689   
1     2-Consecutive        16.544740               29.429938         0.292468   
2     3-Consecutive         2.774051               25.009170         0.282956   
3     4-Consecutive        -7.549252               21.684597         0.272727   
4     5-Consecutive       -15.728318               18.892856         0.264925   

     Sharpe    Sortino  Maximum Drawdown  
0  1.827763  56.085909          0.032735  
1  1.702033  28.811627          0.044559  
2  1.616596  17.822214          0.053198  
3  1.555823  12.072915          0.052935  
4  1.500589   8.468096          0.046800   

      No-N-Consecutive  2-Consecutive  3-Consecutive  4-Consecutive  \
2244         35.591743      16.572659       2.816562      -7.506741   
2245         35.591960      16.572443       2.816346      -7.506957   
2246         35.586785      16.566645   















































      n_consecutive  average_returns  Cumulative Log Returns  Investment Risk  \
0  No-N-Consecutive        30.451432               30.451432         0.347710   
1     2-Consecutive        19.419539               27.012692         0.349668   
2     3-Consecutive        11.249674               24.453766         0.346614   
3     4-Consecutive         4.089714               22.242295         0.344893   
4     5-Consecutive        -2.341996               20.142200         0.339136   

     Sharpe    Sortino  Maximum Drawdown  
0  1.687213  27.495970          0.204019  
1  1.618021  18.513300          0.296522  
2  1.574682  13.837557          0.412193  
3  1.539328  10.856725          0.449732  
4  1.509327   8.672600          0.521950   

      No-N-Consecutive  2-Consecutive  3-Consecutive  4-Consecutive  \
2244         30.353633      19.321739      11.151874       3.991914   
2245         30.371998      19.340105      11.170239       4.010279   
2246         30.388058      19.356165   















































      n_consecutive  average_returns  Cumulative Log Returns  Investment Risk  \
0  No-N-Consecutive        39.946801               39.946801         0.327701   
1     2-Consecutive        20.282069               33.519769         0.314825   
2     3-Consecutive         6.941818               29.025973         0.303990   
3     4-Consecutive        -3.404662               25.345519         0.296703   
4     5-Consecutive       -11.768959               22.516164         0.290786   

     Sharpe    Sortino  Maximum Drawdown  
0  1.721866  93.562140          0.583794  
1  1.638509  45.154039          0.644865  
2  1.583865  28.057331          0.639341  
3  1.533641  18.414313          0.689507  
4  1.494817  13.377073          0.790075   

      No-N-Consecutive  2-Consecutive  3-Consecutive  4-Consecutive  \
2244         39.943050      20.278317       6.931690      -3.416554   
2245         39.936673      20.271941       6.931690      -3.416554   
2246         39.934910      20.270177   















































      n_consecutive  average_returns  Cumulative Log Returns  Investment Risk  \
0  No-N-Consecutive        30.108601               30.108601         0.351267   
1     2-Consecutive        12.532243               24.635082         0.347261   
2     3-Consecutive         0.183770               20.579353         0.347586   
3     4-Consecutive        -9.454737               17.343015         0.347718   
4     5-Consecutive       -18.500548               14.221746         0.348932   

     Sharpe    Sortino  Maximum Drawdown  
0  1.695454  31.782358          0.144869  
1  1.586781  17.352812          0.196469  
2  1.504856  10.839245          0.259714  
3  1.443787   7.357594          0.358452  
4  1.383776   5.153005          0.494051   

      No-N-Consecutive  2-Consecutive  3-Consecutive  4-Consecutive  \
2244         30.071010      12.494652       0.146179      -9.462624   
2245         30.078573      12.502215       0.153741      -9.484765   
2246         30.092790      12.516431   