# Importing Libraries

In [1]:
import numpy as np
import pandas as pd
import config
from tf_agents.environments.tf_py_environment import TFPyEnvironment
from sklearn.preprocessing import MinMaxScaler
from agents.tfagents.dqn import DQNAgent
from agents.tfagents.ppo import PPOAgent
from environments.environment import TradingEnvironment
from environments.wrappers.tf.tfenv import TFTradingEnvironment
from environments.rewards.marketlimitorder import MarketLimitOrderRF
from metrics.trading.pnl import CumulativeLogReturn
from metrics.trading.risk import InvestmentRisk
from metrics.trading.sharpe import SharpeRatio
from metrics.trading.sortino import SortinoRatio
from metrics.trading.drawdown import MaximumDrawdown



# Building Eval Environments

In [2]:
def build_eval_environments(
        dataset_filepath,
        timeframe_size,
        target_horizon_len,
        num_eval_samples,
        fees,
        reward_fn_instance
):
    # Reading dataset
    crypto_dataset_df = pd.read_csv(config.dataset_save_filepath.format(dataset_filepath))
    samples_df = crypto_dataset_df[config.regression_features]

    # Scaling data
    scaler = MinMaxScaler(feature_range=(0, 1.0))
    samples = samples_df.to_numpy(dtype=np.float32)

    num_train_scale_samples = samples.shape[0] - num_eval_samples - target_horizon_len - timeframe_size + 1
    samples[: num_train_scale_samples] = scaler.fit_transform(samples[: num_train_scale_samples])
    samples[num_train_scale_samples: ] = scaler.transform(samples[num_train_scale_samples: ])

    # Constructing timeframes for train, test
    inputs = np.float32([samples[i: i + timeframe_size] for i in range(samples.shape[0] - timeframe_size - target_horizon_len + 1)])

    # Splitting inputs to train-test data
    num_train_inputs = inputs.shape[0] - num_eval_samples
    x_eval = inputs[num_train_inputs:]

    # Computing reward functions for train, test data
    closes = crypto_dataset_df['close'].to_numpy(dtype=np.float32)
    highs = crypto_dataset_df['high'].to_numpy(dtype=np.float32)
    lows = crypto_dataset_df['low'].to_numpy(dtype=np.float32)

    eval_reward_fn = reward_fn_instance(
        timeframe_size=timeframe_size,
        target_horizon_len=target_horizon_len,
        highs=highs[samples.shape[0] - num_eval_samples - timeframe_size - target_horizon_len + 1:],
        lows=lows[samples.shape[0] - num_eval_samples - timeframe_size - target_horizon_len + 1:],
        closes=closes[samples.shape[0] - num_eval_samples - timeframe_size - target_horizon_len + 1:],
        fees_percentage=fees
    )

    assert x_eval.shape[0] == eval_reward_fn.get_reward_fn_shape()[0], \
        f'AssertionError: DimensionMismatch: x_eval: {x_eval.shape}, eval_reward_fn: {eval_reward_fn.get_reward_fn_shape()}'

    eval_env = TFTradingEnvironment(
        env=TradingEnvironment(env_config={
            'states': x_eval,
            'reward_fn': eval_reward_fn,
            'episode_steps': x_eval.shape[0] - 1,
            'metrics': [CumulativeLogReturn(), InvestmentRisk(), SharpeRatio(), SortinoRatio(), MaximumDrawdown()]
        })
    )
    tf_eval_env = TFPyEnvironment(environment=eval_env)
    return eval_env, tf_eval_env

# Building & Loading Agents

In [3]:
def build_agent(
        agent_instance,
        env,
        checkpoint_filepath,
        fc_layers,
        conv_layers
):
    agent = agent_instance(
        input_tensor_spec=env.observation_spec(),
        action_spec=env.action_spec(),
        time_step_spec=env.time_step_spec(),
        env_batch_size=env.batch_size,
        checkpoint_filepath=checkpoint_filepath,
        fc_layers=fc_layers,
        conv_layers=conv_layers
    )
    agent.initialize()
    return agent

# Building Eval Method

In [4]:
def eval_tradernet_smurf(tradernet_policy, smurf_policy, tf_env_wrapper):
    time_step = tf_env_wrapper.reset()
    tradernet_policy_state = tradernet_policy.get_initial_state(tf_env_wrapper.batch_size)
    smurf_policy_state = smurf_policy.get_initial_state(tf_env_wrapper.batch_size)
    cumulative_rewards = 0.0
    cumulative_pnls = 0.0
    pnls = []

    while not time_step.is_last():
        smurf_action = smurf_policy.action(time_step=time_step, policy_state=smurf_policy_state).action
        action = 2 if smurf_action == 2 else tradernet_policy.action(time_step=time_step, policy_state=tradernet_policy_state).action
        time_step = tf_env_wrapper.step(action)
        reward = time_step.reward.numpy()[0]
        cumulative_rewards += reward

        if action != 2:
            cumulative_pnls += reward
        pnls.append(cumulative_pnls)
    return cumulative_rewards, pnls

# Building Configs

In [5]:
datasets_dict = {'BTC': 'BTC', 'ETH': 'ETH', 'ADA': 'ADA', 'XRP': 'XRP', 'LTC': 'LTC'}
agent_dict = {
    'PPO': {
        'agent_instance': PPOAgent,
        'fc_layers': [256, 256],
        'conv_layers': [(32, 3, 1)]
    },
    'DDQN': {
        'agent_instance': DQNAgent,
        'fc_layers': [256, 256],
        'conv_layers': [(32, 3, 1)]
    },
}
env_dict = {
    'timeframe_size': 12,
    'target_horizon_len': 20,
    'num_eval_samples': 2250,
    'fees': 0.007
}

reward_fn_name = 'Market-Limit Orders'

# Running Experiments for TraderNet + Smurf

In [6]:
for agent_name, agent_config in agent_dict.items():
    for dataset_name, dataset_filepath in datasets_dict.items():
        eval_env, tf_eval_env = build_eval_environments(
            dataset_filepath=dataset_filepath,
            reward_fn_instance=MarketLimitOrderRF,
            **env_dict
        )
        tradernet = build_agent(
            env=tf_eval_env,
            checkpoint_filepath=f'database/storage/checkpoints/experiments/tradernet/{agent_name}/{dataset_name}/{reward_fn_name}/',
            **agent_config
        )

        smurf_agent = build_agent(
            env=tf_eval_env,
            checkpoint_filepath=f'database/storage/checkpoints/experiments/smurf/{agent_name}/{dataset_name}/{reward_fn_name}/',
            **agent_config
        )
        average_returns, pnls = eval_tradernet_smurf(
            tradernet_policy=tradernet.policy,
            smurf_policy=smurf_agent.policy,
            tf_env_wrapper=tf_eval_env
        )
        metrics = {
            'average_returns': [average_returns],
            **{key: [metric] for key, metric in eval_env.get_episode_metrics().items()}
        }
        results_df = pd.DataFrame(metrics)
        results_df.to_csv(f'experiments/smurf/{agent_name}/{dataset_name}_{reward_fn_name}_metrics.csv', index=False)

        print(results_df, '\n')

        episode_pnls_df = pd.DataFrame(pnls)
        episode_pnls_df.to_csv(f'experiments/smurf/{agent_name}/{dataset_name}_{reward_fn_name}_eval_cumul_pnls.csv', index=False)

        print(episode_pnls_df.tail(5))

  return np.exp(average_returns/std_returns)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)


   average_returns  Cumulative Log Returns  Investment Risk  Sharpe  Sortino  \
0       -34.009088                     0.0                0     NaN      NaN   

   Maximum Drawdown  
0                 0   

        0
2244  0.0
2245  0.0
2246  0.0
2247  0.0
2248  0.0
   average_returns  Cumulative Log Returns  Investment Risk    Sharpe  \
0        35.808246               35.808246         0.331258  1.776735   

    Sortino  Maximum Drawdown  
0  54.24813          0.112173   

              0
2244  35.782564
2245  35.776924
2246  35.771749
2247  35.786066
2248  35.808246
   average_returns  Cumulative Log Returns  Investment Risk    Sharpe  \
0        -2.324393                19.50032         0.325718  1.537387   

    Sortino  Maximum Drawdown  
0  8.341776        203.941111   

             0
2244  19.50032
2245  19.50032
2246  19.50032
2247  19.50032
2248  19.50032
   average_returns  Cumulative Log Returns  Investment Risk    Sharpe  \
0        19.905309               32.385187      























   average_returns  Cumulative Log Returns  Investment Risk    Sharpe  \
0        17.658513               26.034626         0.349792  1.609989   

    Sortino  Maximum Drawdown  
0  17.91851          0.230554   

              0
2244  26.034626
2245  26.034626
2246  26.034626
2247  26.034626
2248  26.034626
   average_returns  Cumulative Log Returns  Investment Risk    Sharpe  \
0        37.158294               38.576688         0.330631  1.702091   

     Sortino  Maximum Drawdown  
0  80.101707            0.5823   

              0
2244  38.572936
2245  38.566560
2246  38.564796
2247  38.566802
2248  38.576688
   average_returns  Cumulative Log Returns  Investment Risk    Sharpe  \
0        12.706338               24.789818         0.344731  1.588669   

    Sortino  Maximum Drawdown  
0  17.03119          0.160215   

              0
2244  24.759789
2245  24.759789
2246  24.774006
2247  24.793225
2248  24.789818
