# Importing Libraries

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
import config
from tf_agents.environments.tf_py_environment import TFPyEnvironment
from sklearn.preprocessing import MinMaxScaler
from agents.tfagents.dqn import DQNAgent
from agents.tfagents.ppo import PPOAgent
from environments.environment import TradingEnvironment
from environments.wrappers.tf.tfenv import TFTradingEnvironment
from environments.rewards.marketorder import MarketOrderRF
from environments.rewards.marketlimitorder import MarketLimitOrderRF
from metrics.trading.pnl import CumulativeLogReturn
from metrics.trading.risk import InvestmentRisk
from metrics.trading.sharpe import SharpeRatio
from metrics.trading.sortino import SortinoRatio
from metrics.trading.drawdown import MaximumDrawdown

# Reading Datasets

In [2]:
def read_dataset(
        dataset_filepath,
        timeframe_size,
        target_horizon_len,
        num_eval_samples,
        fees,
        reward_fn_instance
):
    # Reading dataset
    crypto_dataset_df = pd.read_csv(config.dataset_save_filepath.format(dataset_filepath))
    samples_df = crypto_dataset_df[config.regression_features]

    # Scaling data
    scaler = MinMaxScaler(feature_range=(0, 1.0))
    samples = samples_df.to_numpy(dtype=np.float32)

    num_train_scale_samples = samples.shape[0] - num_eval_samples - target_horizon_len - timeframe_size + 1
    samples[: num_train_scale_samples] = scaler.fit_transform(samples[: num_train_scale_samples])
    samples[num_train_scale_samples: ] = scaler.transform(samples[num_train_scale_samples: ])

    # Constructing timeframes for train, test
    inputs = np.float32([samples[i: i + timeframe_size] for i in range(samples.shape[0] - timeframe_size - target_horizon_len + 1)])

    # Splitting inputs to train-test data
    num_train_inputs = inputs.shape[0] - num_eval_samples
    x_train = inputs[: num_train_inputs]
    x_eval = inputs[num_train_inputs:]

    # Computing reward functions for train, test data
    closes = crypto_dataset_df['close'].to_numpy(dtype=np.float32)
    highs = crypto_dataset_df['high'].to_numpy(dtype=np.float32)
    lows = crypto_dataset_df['low'].to_numpy(dtype=np.float32)

    train_reward_fn = reward_fn_instance(
        timeframe_size=timeframe_size,
        target_horizon_len=target_horizon_len,
        highs=highs[: samples.shape[0] - num_eval_samples],
        lows=lows[: samples.shape[0] - num_eval_samples],
        closes=closes[: samples.shape[0] - num_eval_samples],
        fees_percentage=fees
    )

    eval_reward_fn = reward_fn_instance(
        timeframe_size=timeframe_size,
        target_horizon_len=target_horizon_len,
        highs=highs[samples.shape[0] - num_eval_samples - timeframe_size - target_horizon_len + 1:],
        lows=lows[samples.shape[0] - num_eval_samples - timeframe_size - target_horizon_len + 1:],
        closes=closes[samples.shape[0] - num_eval_samples - timeframe_size - target_horizon_len + 1:],
        fees_percentage=fees
    )

    assert x_train.shape[0] == train_reward_fn.get_reward_fn_shape()[0], \
        f'AssertionError: DimensionMismatch: x_train: {x_train.shape}, train_reward_fn: {train_reward_fn.get_reward_fn_shape()}'
    assert x_eval.shape[0] == eval_reward_fn.get_reward_fn_shape()[0], \
        f'AssertionError: DimensionMismatch: x_eval: {x_eval.shape}, eval_reward_fn: {eval_reward_fn.get_reward_fn_shape()}'

    return x_train, train_reward_fn, x_eval, eval_reward_fn

# Building Agent

In [3]:
def build_agent(
        agent_instance,
        observation_spec,
        action_spec,
        time_step_spec,
        env_batch_size,
        checkpoint_filepath,
        fc_layers,
        conv_layers
):
    return agent_instance(
        input_tensor_spec=observation_spec,
        action_spec=action_spec,
        time_step_spec=time_step_spec,
        env_batch_size=env_batch_size,
        checkpoint_filepath=checkpoint_filepath,
        fc_layers=fc_layers,
        conv_layers=conv_layers
    )

# Building Trainer

In [4]:
def train(
        dataset_filepath,
        timeframe_size,
        target_horizon_len,
        num_eval_samples,
        fees,
        reward_fn_instance,
        agent_instance,
        checkpoint_filepath,
        fc_layers,
        conv_layers,
        train_episode_steps,
        train_iterations,
        eval_episodes,
        steps_per_eval,
        steps_per_log,
        steps_per_checkpoint,
        save_best_only
):
    x_train, train_reward_fn, x_eval, eval_reward_fn = read_dataset(
        dataset_filepath=dataset_filepath,
        timeframe_size=timeframe_size,
        target_horizon_len=target_horizon_len,
        num_eval_samples=num_eval_samples,
        fees=fees,
        reward_fn_instance=reward_fn_instance
    )
    train_env = TFTradingEnvironment(
        env=TradingEnvironment(env_config={
            'states': x_train,
            'reward_fn': train_reward_fn,
            'episode_steps': train_episode_steps,
            'metrics': [CumulativeLogReturn(), InvestmentRisk(), SharpeRatio(), SortinoRatio(), MaximumDrawdown()]
        })
    )
    eval_env = TFTradingEnvironment(
        env=TradingEnvironment(env_config={
            'states': x_eval,
            'reward_fn': eval_reward_fn,
            'episode_steps': x_eval.shape[0] - 1,
            'metrics': [CumulativeLogReturn(), InvestmentRisk(), SharpeRatio(), SortinoRatio(), MaximumDrawdown()]
        })
    )

    tf_train_env = TFPyEnvironment(environment=train_env)
    tf_eval_env = TFPyEnvironment(environment=eval_env)

    agent = build_agent(
        agent_instance=agent_instance,
        observation_spec=tf_train_env.observation_spec(),
        action_spec=tf_train_env.action_spec(),
        time_step_spec=tf_train_env.time_step_spec(),
        env_batch_size=tf_train_env.batch_size,
        checkpoint_filepath=checkpoint_filepath,
        fc_layers=fc_layers,
        conv_layers=conv_layers,
    )

    agent.initialize()

    eval_avg_returns = agent.train(
        train_env=tf_train_env,
        eval_env=tf_eval_env,
        train_iterations=train_iterations,
        eval_episodes=eval_episodes,
        iterations_per_eval=steps_per_eval,
        iterations_per_log=steps_per_log,
        iterations_per_checkpoint=steps_per_checkpoint,
        save_best_only=save_best_only
    )
    eval_metrics = eval_env.get_metrics()
    return eval_avg_returns, eval_metrics

# Building Train Configs

In [5]:
datasets_dict = {'BTC': 'BTC', 'ETH': 'ETH', 'ADA': 'ADA', 'XRP': 'XRP', 'LTC': 'LTC'}
rewards_dict = {
    'Market-Orders':  MarketOrderRF,
    'Market-Limit Orders': MarketLimitOrderRF
}
agents_configs = {
    'DDQN': {
        'agent_instance': DQNAgent,
        'train_iterations': 50000,
        'steps_per_eval': 500,
        'steps_per_log': 500,
        'steps_per_checkpoint': 500
    },
    'PPO': {
        'agent_instance': PPOAgent,
        'train_iterations': 1000,
        'steps_per_eval': 10,
        'steps_per_log': 10,
        'steps_per_checkpoint': 10
    }
}
train_dict = {
    'timeframe_size': 12,
    'target_horizon_len': 20,
    'num_eval_samples': 2250,
    'fees': 0.007,
    'fc_layers': [256, 256],
    'conv_layers': [(32, 3, 1)],
    'train_episode_steps': 100,
    'eval_episodes': 1,
    'save_best_only': True
}

# Run TraderNet Experiments

In [6]:
results = {
    'PPO': {dataset_name: {} for dataset_name in datasets_dict.keys()},
    'DDQN': {dataset_name: {} for dataset_name in datasets_dict.keys()}
}

colors = {
    'BTC': 'green',
    'ETH': 'blue',
    'XRP': 'red',
    'ADA': 'black',
    'LTC': 'orange'
}
linestyles = {
    'Market-Orders': '--',
    'Market-Limit-Orders': '-'
}

for agent_name, agent_config in agents_configs.items():
    for dataset_name, dataset_filepath in datasets_dict.items():
        for reward_fn_name, reward_fn_instance in rewards_dict.items():
            tf.random.set_seed(seed=0)

            train_params = {
                'dataset_filepath': dataset_filepath,
                'reward_fn_instance': reward_fn_instance,
                'checkpoint_filepath': f'database/storage/checkpoints/experiments/tradernet/{agent_name}/{dataset_name}/{reward_fn_name}/',
                **train_dict,
                **agent_config
            }
            eval_avg_returns, eval_metrics = train(**train_params)

            results[agent_name][dataset_name][reward_fn_name] = (eval_avg_returns, eval_metrics)

        for reward_fn_name, reward_fn_results in results[agent_name][dataset_name].items():
            eval_avg_returns, eval_metrics = reward_fn_results

            metrics_dict = {
                'steps': [10000*i for i in range(len(eval_avg_returns))],
                'average_returns': eval_avg_returns,
                **{metric.name: metric.episode_metrics for metric in eval_metrics}
            }
            metrics_df = pd.DataFrame(metrics_dict)
            metrics_df.to_csv(f'experiments/tradernet/{agent_name}/{dataset_name}_{reward_fn_name}.csv', index=False)

Instructions for updating:
Use `as_dataset(..., single_deterministic_pass=False) instead.


Instructions for updating:
Use `as_dataset(..., single_deterministic_pass=False) instead.


Collecting Initial Samples...
Training has started...

New best average return found at -3.3251941204071045! Saving checkpoint at iteration 0
Instructions for updating:
back_prop=False is deprecated. Consider using tf.stop_gradient instead.
Instead of:
results = tf.foldr(fn, elems, back_prop=False)
Use:
results = tf.nest.map_structure(tf.stop_gradient, tf.foldr(fn, elems))


Instructions for updating:
back_prop=False is deprecated. Consider using tf.stop_gradient instead.
Instead of:
results = tf.foldr(fn, elems, back_prop=False)
Use:
results = tf.nest.map_structure(tf.stop_gradient, tf.foldr(fn, elems))



New best average return found at -2.6267342567443848! Saving checkpoint at iteration 500

Iteration: 500
Train Loss: 0.004517136141657829
Average Return: -2.6267342567443848

New best average return found at -2.1988868713378906! Saving checkpoint at iteration 1000

Iteration: 1000
Train Loss: 0.002659676130861044
Average Return: -2.1988868713378906

New best average return found at -1.1928369998931885! Saving checkpoint at iteration 1500

Iteration: 1500
Train Loss: 0.00444418378174305
Average Return: -1.1928369998931885

Iteration: 2000
Train Loss: 0.003950432874262333
Average Return: -1.593271017074585

New best average return found at -0.10673896968364716! Saving checkpoint at iteration 2500

Iteration: 2500
Train Loss: 0.003947971388697624
Average Return: -0.10673896968364716

Iteration: 3000
Train Loss: 0.0027603469789028168
Average Return: -6.610193252563477

Iteration: 3500
Train Loss: 0.003471020143479109
Average Return: -0.64287930727005

New best average return found at 0.26

  return np.exp(average_returns/std_downfall_returns)



Iteration: 14500
Train Loss: 0.00968550518155098
Average Return: 4.744236946105957

Iteration: 15000
Train Loss: 0.030995724722743034
Average Return: 12.404884338378906

Iteration: 15500
Train Loss: 0.01618315279483795
Average Return: 11.38774299621582

Iteration: 16000
Train Loss: 0.017214620485901833
Average Return: 12.028968811035156

Iteration: 16500
Train Loss: 0.020484738051891327
Average Return: 11.729836463928223

Iteration: 17000
Train Loss: 0.021418271586298943
Average Return: 9.963153839111328

Iteration: 17500
Train Loss: 0.020487137138843536
Average Return: 8.861655235290527

Iteration: 18000
Train Loss: 0.017623163759708405
Average Return: 9.623153686523438

Iteration: 18500
Train Loss: 0.316775918006897
Average Return: 11.739766120910645

Iteration: 19000
Train Loss: 0.03245686739683151
Average Return: 12.01941967010498

Iteration: 19500
Train Loss: 0.03994602710008621
Average Return: 9.00653076171875

Iteration: 20000
Train Loss: 0.04353069141507149
Average Return: 0.5

  return np.exp(average_returns/std_downfall_returns)


Training has started...

New best average return found at 26.674510955810547! Saving checkpoint at iteration 0

New best average return found at 34.36062240600586! Saving checkpoint at iteration 500

Iteration: 500
Train Loss: 0.014842865988612175
Average Return: 34.36062240600586

Iteration: 1000
Train Loss: 0.009296132251620293
Average Return: 33.677608489990234

Iteration: 1500
Train Loss: 0.006143459118902683
Average Return: 33.426048278808594

Iteration: 2000
Train Loss: 0.006470206193625927
Average Return: 32.41636657714844

Iteration: 2500
Train Loss: 0.016628220677375793
Average Return: 31.235071182250977

Iteration: 3000
Train Loss: 0.0131447222083807
Average Return: 34.02251434326172

Iteration: 3500
Train Loss: 0.019370481371879578
Average Return: 32.952945709228516

New best average return found at 35.579505920410156! Saving checkpoint at iteration 4000

Iteration: 4000
Train Loss: 0.02315877191722393
Average Return: 35.579505920410156

Iteration: 4500
Train Loss: 0.0219992

  return np.exp(average_returns/std_downfall_returns)



Iteration: 29500
Train Loss: 0.039734918624162674
Average Return: 15.872835159301758

Iteration: 30000
Train Loss: 0.0404592864215374
Average Return: 26.861629486083984

Iteration: 30500
Train Loss: 0.03974232077598572
Average Return: 28.027565002441406

Iteration: 31000
Train Loss: 0.12504422664642334
Average Return: 25.17815399169922

Iteration: 31500
Train Loss: 0.11980539560317993
Average Return: 22.144664764404297

Iteration: 32000
Train Loss: 0.03582223504781723
Average Return: 29.08325958251953

Iteration: 32500
Train Loss: 0.06847092509269714
Average Return: 31.308080673217773

Iteration: 33000
Train Loss: 0.045389141887426376
Average Return: 24.97724151611328

Iteration: 33500
Train Loss: 0.053193941712379456
Average Return: 27.473323822021484

Iteration: 34000
Train Loss: 0.026069993153214455
Average Return: 24.416536331176758

Iteration: 34500
Train Loss: 0.026640374213457108
Average Return: 24.8795223236084

Iteration: 35000
Train Loss: 0.06758280098438263
Average Return: 

  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)



Iteration: 48500
Train Loss: 0.05505954474210739
Average Return: 19.10175323486328

Iteration: 49000
Train Loss: 0.05312485992908478
Average Return: 20.285017013549805

Iteration: 49500
Train Loss: 0.07160315662622452
Average Return: 19.44167137145996

Iteration: 50000
Train Loss: 0.1190890371799469
Average Return: 19.467395782470703
Collecting Initial Samples...
Training has started...

New best average return found at -44.11851119995117! Saving checkpoint at iteration 0

New best average return found at 9.697697639465332! Saving checkpoint at iteration 500

Iteration: 500
Train Loss: 0.0030540823936462402
Average Return: 9.697697639465332

New best average return found at 11.083016395568848! Saving checkpoint at iteration 1000

Iteration: 1000
Train Loss: 0.006096345372498035
Average Return: 11.083016395568848

New best average return found at 14.647705078125! Saving checkpoint at iteration 1500

Iteration: 1500
Train Loss: 0.005026837810873985
Average Return: 14.647705078125

Itera

  return np.exp(average_returns/std_downfall_returns)



Iteration: 42500
Train Loss: 0.07082706689834595
Average Return: 20.564373016357422

Iteration: 43000
Train Loss: 0.07105065137147903
Average Return: 17.692092895507812

Iteration: 43500
Train Loss: 0.09303058683872223
Average Return: 15.988404273986816

Iteration: 44000
Train Loss: 0.03563743084669113
Average Return: 14.372461318969727

Iteration: 44500
Train Loss: 0.0445047952234745
Average Return: 23.42290496826172

Iteration: 45000
Train Loss: 0.044997043907642365
Average Return: 13.917854309082031

Iteration: 45500
Train Loss: 0.030068514868617058
Average Return: 15.403700828552246

Iteration: 46000
Train Loss: 0.039457205682992935
Average Return: 18.324506759643555

Iteration: 46500
Train Loss: 0.054179057478904724
Average Return: 18.779537200927734

Iteration: 47000
Train Loss: 0.051116637885570526
Average Return: 17.13608169555664

Iteration: 47500
Train Loss: 0.05406451225280762
Average Return: 16.363452911376953

Iteration: 48000
Train Loss: 0.03296280279755592
Average Retur

  return np.exp(average_returns/std_downfall_returns)



New best average return found at 39.94670104980469! Saving checkpoint at iteration 2500

Iteration: 2500
Train Loss: 0.034266166388988495
Average Return: 39.94670104980469

Iteration: 3000
Train Loss: 0.005774769000709057
Average Return: 29.007732391357422

Iteration: 3500
Train Loss: 0.007667203433811665
Average Return: 29.335298538208008

Iteration: 4000
Train Loss: 0.06139087677001953
Average Return: 32.82987976074219

Iteration: 4500
Train Loss: 0.024734649807214737
Average Return: 34.1562614440918

Iteration: 5000
Train Loss: 0.013552075251936913
Average Return: 32.62443542480469

Iteration: 5500
Train Loss: 0.028917577117681503
Average Return: 32.91023635864258

Iteration: 6000
Train Loss: 0.03204839304089546
Average Return: 34.35884475708008

Iteration: 6500
Train Loss: 0.0349116176366806
Average Return: 34.153526306152344

Iteration: 7000
Train Loss: 0.04254339635372162
Average Return: 28.3703670501709

Iteration: 7500
Train Loss: 0.052191294729709625
Average Return: 30.228717

  return np.exp(average_returns/std_downfall_returns)



Iteration: 44500
Train Loss: 0.04125214368104935
Average Return: 15.408001899719238

Iteration: 45000
Train Loss: 0.02894483134150505
Average Return: 21.040027618408203

Iteration: 45500
Train Loss: 0.04462943598628044
Average Return: 19.959476470947266

Iteration: 46000
Train Loss: 0.08680948615074158
Average Return: 20.964540481567383

Iteration: 46500
Train Loss: 0.042590994387865067
Average Return: 14.944565773010254

Iteration: 47000
Train Loss: 0.03898043930530548
Average Return: 18.49053955078125

Iteration: 47500
Train Loss: 0.03178520128130913
Average Return: 19.17402458190918

Iteration: 48000
Train Loss: 0.0307757668197155
Average Return: 16.51491355895996

Iteration: 48500
Train Loss: 0.028123019263148308
Average Return: 20.195476531982422

Iteration: 49000
Train Loss: 0.030516181141138077
Average Return: 19.901241302490234

Iteration: 49500
Train Loss: 0.046743009239435196
Average Return: 20.680295944213867

Iteration: 50000
Train Loss: 0.02945210412144661
Average Return:

  return np.exp(average_returns/std_downfall_returns)



Iteration: 10000
Train Loss: 0.023667778819799423
Average Return: 25.605226516723633

Iteration: 10500
Train Loss: 0.04919448122382164
Average Return: 22.970861434936523

Iteration: 11000
Train Loss: 0.04298529401421547
Average Return: 25.2415771484375

Iteration: 11500
Train Loss: 0.014252390712499619
Average Return: 20.440105438232422

Iteration: 12000
Train Loss: 0.04621461033821106
Average Return: 24.700584411621094

Iteration: 12500
Train Loss: 0.01949145272374153
Average Return: 21.308048248291016

Iteration: 13000
Train Loss: 0.020260967314243317
Average Return: 22.767114639282227

Iteration: 13500
Train Loss: 0.05006321519613266
Average Return: 25.912738800048828

Iteration: 14000
Train Loss: 0.052836641669273376
Average Return: 22.558948516845703

Iteration: 14500
Train Loss: 0.013921651989221573
Average Return: 22.873308181762695

Iteration: 15000
Train Loss: 0.05262694135308266
Average Return: 21.38837432861328

Iteration: 15500
Train Loss: 0.06189469248056412
Average Retur

  return np.exp(average_returns/std_downfall_returns)



Iteration: 46000
Train Loss: 0.08623255044221878
Average Return: 19.810997009277344

Iteration: 46500
Train Loss: 0.0432465597987175
Average Return: 17.709928512573242

Iteration: 47000
Train Loss: 0.02915484458208084
Average Return: 14.909781455993652

Iteration: 47500
Train Loss: 0.05309150367975235
Average Return: 19.572769165039062

Iteration: 48000
Train Loss: 0.06309910863637924
Average Return: 19.79684829711914

Iteration: 48500
Train Loss: 0.051611725240945816
Average Return: 20.55994415283203

Iteration: 49000
Train Loss: 0.0620298907160759
Average Return: 17.001340866088867

Iteration: 49500
Train Loss: 0.06673221290111542
Average Return: 20.259021759033203

Iteration: 50000
Train Loss: 0.04690238833427429
Average Return: 17.817033767700195
Training has started...


  return np.exp(average_returns/std_returns)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)



New best average return found at -24.56235694885254! Saving checkpoint at iteration 0
Instructions for updating:
Use `as_dataset(..., single_deterministic_pass=True)` instead.


Instructions for updating:
Use `as_dataset(..., single_deterministic_pass=True)` instead.



New best average return found at -9.082357406616211! Saving checkpoint at iteration 10

Iteration: 10
Train Loss: 76.96861267089844
Average Return: -9.082357406616211

New best average return found at -7.86840295791626! Saving checkpoint at iteration 20

Iteration: 20
Train Loss: 103.05207824707031
Average Return: -7.86840295791626

New best average return found at -2.489945888519287! Saving checkpoint at iteration 30

Iteration: 30
Train Loss: 2.7145440578460693
Average Return: -2.489945888519287

Iteration: 40
Train Loss: 35.020503997802734
Average Return: -3.2149956226348877

New best average return found at -2.186483860015869! Saving checkpoint at iteration 50

Iteration: 50
Train Loss: 10.051138877868652
Average Return: -2.186483860015869

Iteration: 60
Train Loss: 3.69128155708313
Average Return: -2.2240147590637207

New best average return found at -0.3787486255168915! Saving checkpoint at iteration 70

Iteration: 70
Train Loss: 5.146501064300537
Average Return: -0.378748625516

  return np.exp(average_returns/std_downfall_returns)



Iteration: 170
Train Loss: 52.83454513549805
Average Return: -5.25177001953125

Iteration: 180
Train Loss: 4.874590873718262
Average Return: -5.434153079986572

Iteration: 190
Train Loss: 6.426185607910156
Average Return: -4.4396185874938965

Iteration: 200
Train Loss: 19.11383056640625
Average Return: -4.696266174316406

Iteration: 210
Train Loss: 22.2358341217041
Average Return: -3.511338710784912

Iteration: 220
Train Loss: 5.90990686416626
Average Return: -3.7045938968658447

Iteration: 230
Train Loss: 63.00525665283203
Average Return: -4.930360794067383

Iteration: 240
Train Loss: 29.741174697875977
Average Return: -5.014069080352783

Iteration: 250
Train Loss: 8.847372055053711
Average Return: -5.560515880584717

Iteration: 260
Train Loss: 13.609344482421875
Average Return: -4.795721054077148

Iteration: 270
Train Loss: 11.227121353149414
Average Return: -3.153813362121582

Iteration: 280
Train Loss: 8.795845031738281
Average Return: -2.8686413764953613

Iteration: 290
Train Los

  return np.exp(average_returns/std_returns)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)



New best average return found at -34.00912094116211! Saving checkpoint at iteration 0

New best average return found at 5.621676921844482! Saving checkpoint at iteration 10

Iteration: 10
Train Loss: 0.46018218994140625
Average Return: 5.621676921844482

Iteration: 20
Train Loss: 1.4222354888916016
Average Return: 4.73570442199707

Iteration: 30
Train Loss: 0.04966241866350174
Average Return: 2.025658369064331

New best average return found at 6.530117511749268! Saving checkpoint at iteration 40

Iteration: 40
Train Loss: 0.918865442276001
Average Return: 6.530117511749268

New best average return found at 7.718655586242676! Saving checkpoint at iteration 50

Iteration: 50
Train Loss: 0.5210205316543579
Average Return: 7.718655586242676

Iteration: 60
Train Loss: 0.8800739049911499
Average Return: 1.753743052482605

Iteration: 70
Train Loss: 1.7316622734069824
Average Return: 1.1272082328796387

Iteration: 80
Train Loss: 1.0698623657226562
Average Return: -0.36870265007019043

Iterati

  return np.exp(average_returns/std_downfall_returns)



Iteration: 100
Train Loss: 0.5158969163894653
Average Return: -6.293537616729736

Iteration: 110
Train Loss: 1.699774980545044
Average Return: 6.281260967254639

New best average return found at 9.275382041931152! Saving checkpoint at iteration 120

Iteration: 120
Train Loss: 2.34633469581604
Average Return: 9.275382041931152

Iteration: 130
Train Loss: 0.3292926847934723
Average Return: 8.948965072631836

New best average return found at 9.87955093383789! Saving checkpoint at iteration 140

Iteration: 140
Train Loss: 3.070344924926758
Average Return: 9.87955093383789

New best average return found at 9.947980880737305! Saving checkpoint at iteration 150

Iteration: 150
Train Loss: 0.9939121603965759
Average Return: 9.947980880737305

Iteration: 160
Train Loss: 0.20012550055980682
Average Return: 5.086240768432617

Iteration: 170
Train Loss: 2.1696181297302246
Average Return: 6.095435619354248

Iteration: 180
Train Loss: 0.7126433849334717
Average Return: 2.5526211261749268

Iteration

  return np.exp(average_returns/std_returns)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)



New best average return found at -48.64369201660156! Saving checkpoint at iteration 0

New best average return found at 0.40528222918510437! Saving checkpoint at iteration 10

Iteration: 10
Train Loss: 70.51688385009766
Average Return: 0.40528222918510437

New best average return found at 13.098921775817871! Saving checkpoint at iteration 20

Iteration: 20
Train Loss: 17.186201095581055
Average Return: 13.098921775817871

Iteration: 30
Train Loss: 13.850643157958984
Average Return: 9.405078887939453

Iteration: 40
Train Loss: 5.288665771484375
Average Return: 6.276732921600342

Iteration: 50
Train Loss: 3.398315668106079
Average Return: 10.637572288513184

Iteration: 60
Train Loss: 6.0303144454956055
Average Return: 7.900406837463379

Iteration: 70
Train Loss: 176.51661682128906
Average Return: -0.5921937823295593

Iteration: 80
Train Loss: 16.269285202026367
Average Return: 9.368404388427734

Iteration: 90
Train Loss: 31.27381706237793
Average Return: 7.797119617462158

Iteration: 10

  return np.exp(average_returns/std_returns)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)



New best average return found at -65.24463653564453! Saving checkpoint at iteration 0

New best average return found at 31.52944564819336! Saving checkpoint at iteration 10

Iteration: 10
Train Loss: 30.882476806640625
Average Return: 31.52944564819336

Iteration: 20
Train Loss: 3.7421491146087646
Average Return: 12.899675369262695

New best average return found at 31.550079345703125! Saving checkpoint at iteration 30

Iteration: 30
Train Loss: 15.2725248336792
Average Return: 31.550079345703125

Iteration: 40
Train Loss: 14.822991371154785
Average Return: 18.04451560974121

Iteration: 50
Train Loss: 1.8637970685958862
Average Return: 25.020660400390625

Iteration: 60
Train Loss: 5.907175064086914
Average Return: 26.850509643554688

Iteration: 70
Train Loss: 128.55764770507812
Average Return: 24.307260513305664

Iteration: 80
Train Loss: 13.327314376831055
Average Return: 28.896926879882812

Iteration: 90
Train Loss: 13.951639175415039
Average Return: 28.272659301757812

Iteration: 10

  return np.exp(average_returns/std_downfall_returns)



New best average return found at 34.63430404663086! Saving checkpoint at iteration 220

Iteration: 220
Train Loss: 17.594863891601562
Average Return: 34.63430404663086

Iteration: 230
Train Loss: 86.07440948486328
Average Return: 33.724552154541016

Iteration: 240
Train Loss: 30.18636131286621
Average Return: 31.809232711791992

Iteration: 250
Train Loss: 23.029687881469727
Average Return: 31.708229064941406

Iteration: 260
Train Loss: 19.543594360351562
Average Return: 29.641963958740234

Iteration: 270
Train Loss: 9.848798751831055
Average Return: 29.528423309326172

Iteration: 280
Train Loss: 16.52969741821289
Average Return: 23.857404708862305

Iteration: 290
Train Loss: 8.878270149230957
Average Return: 27.526376724243164

Iteration: 300
Train Loss: 56.107269287109375
Average Return: 27.303577423095703

Iteration: 310
Train Loss: 109.92997741699219
Average Return: 27.830976486206055

Iteration: 320
Train Loss: 7.256505489349365
Average Return: 29.732585906982422

Iteration: 330
T

  return np.exp(average_returns/std_downfall_returns)



Iteration: 700
Train Loss: 18.130949020385742
Average Return: 32.86817169189453

Iteration: 710
Train Loss: 13.423917770385742
Average Return: 33.56792449951172

Iteration: 720
Train Loss: 20.193578720092773
Average Return: 33.57172393798828

Iteration: 730
Train Loss: 9.036803245544434
Average Return: 34.95358657836914

Iteration: 740
Train Loss: 10.679986953735352
Average Return: 34.899288177490234

New best average return found at 35.236839294433594! Saving checkpoint at iteration 750

Iteration: 750
Train Loss: 5.837555885314941
Average Return: 35.236839294433594

Iteration: 760
Train Loss: 23.15612030029297
Average Return: 35.08951187133789

Iteration: 770
Train Loss: 17.101131439208984
Average Return: 33.62620162963867

Iteration: 780
Train Loss: 13.467620849609375
Average Return: 33.17713165283203

Iteration: 790
Train Loss: 17.285812377929688
Average Return: 33.3601188659668

Iteration: 800
Train Loss: 10.91231632232666
Average Return: 33.16476821899414

Iteration: 810
Train L

  return np.exp(average_returns/std_downfall_returns)



Iteration: 310
Train Loss: 26.124069213867188
Average Return: 26.255859375

Iteration: 320
Train Loss: 41.37175369262695
Average Return: 26.070159912109375

Iteration: 330
Train Loss: 106.23200988769531
Average Return: 26.699647903442383

Iteration: 340
Train Loss: 47.64500045776367
Average Return: 26.317041397094727

Iteration: 350
Train Loss: 19.235759735107422
Average Return: 25.577198028564453

Iteration: 360
Train Loss: 19.773860931396484
Average Return: 25.15020179748535

Iteration: 370
Train Loss: 14.424314498901367
Average Return: 26.3021240234375

Iteration: 380
Train Loss: 40.704532623291016
Average Return: 25.834980010986328

Iteration: 390
Train Loss: 21.924833297729492
Average Return: 25.750274658203125

Iteration: 400
Train Loss: 26.356698989868164
Average Return: 26.177915573120117

Iteration: 410
Train Loss: 50.576873779296875
Average Return: 25.78809928894043

Iteration: 420
Train Loss: 17.180110931396484
Average Return: 24.58803367614746

Iteration: 430
Train Loss: 4

  return np.exp(average_returns/std_returns)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)



New best average return found at -53.16938018798828! Saving checkpoint at iteration 0

New best average return found at 7.0066633224487305! Saving checkpoint at iteration 10

Iteration: 10
Train Loss: 16.020870208740234
Average Return: 7.0066633224487305

New best average return found at 9.603474617004395! Saving checkpoint at iteration 20

Iteration: 20
Train Loss: 21.447486877441406
Average Return: 9.603474617004395

Iteration: 30
Train Loss: 7.572327613830566
Average Return: 5.982873916625977

Iteration: 40
Train Loss: 240.0513153076172
Average Return: 0.8460615277290344

Iteration: 50
Train Loss: 3.418628454208374
Average Return: 5.343705177307129

New best average return found at 11.387880325317383! Saving checkpoint at iteration 60

Iteration: 60
Train Loss: 9.410750389099121
Average Return: 11.387880325317383

New best average return found at 13.292974472045898! Saving checkpoint at iteration 70

Iteration: 70
Train Loss: 249.70150756835938
Average Return: 13.292974472045898

N

  return np.exp(average_returns/std_returns)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)



New best average return found at -70.37957000732422! Saving checkpoint at iteration 0

New best average return found at 22.008630752563477! Saving checkpoint at iteration 10

Iteration: 10
Train Loss: 8.902644157409668
Average Return: 22.008630752563477

Iteration: 20
Train Loss: 103.37938690185547
Average Return: 21.374168395996094

New best average return found at 25.21816062927246! Saving checkpoint at iteration 30

Iteration: 30
Train Loss: 28.025283813476562
Average Return: 25.21816062927246

New best average return found at 28.651527404785156! Saving checkpoint at iteration 40

Iteration: 40
Train Loss: 490.78509521484375
Average Return: 28.651527404785156

New best average return found at 34.956356048583984! Saving checkpoint at iteration 50

Iteration: 50
Train Loss: 22.34660530090332
Average Return: 34.956356048583984

Iteration: 60
Train Loss: 35.16065979003906
Average Return: 28.90963363647461

Iteration: 70
Train Loss: 207.6363983154297
Average Return: 33.41512680053711

N

  return np.exp(average_returns/std_downfall_returns)



Iteration: 170
Train Loss: 20.440929412841797
Average Return: 27.96502685546875

Iteration: 180
Train Loss: 118.69840240478516
Average Return: 29.34032440185547

Iteration: 190
Train Loss: 9.380199432373047
Average Return: 28.560409545898438

Iteration: 200
Train Loss: 48.52779769897461
Average Return: 30.519847869873047

Iteration: 210
Train Loss: 261.7218933105469
Average Return: 26.530473709106445

Iteration: 220
Train Loss: 3.2605135440826416
Average Return: 21.467056274414062

Iteration: 230
Train Loss: 168.5249786376953
Average Return: 25.69187355041504

Iteration: 240
Train Loss: 71.05810546875
Average Return: 20.825489044189453

Iteration: 250
Train Loss: 9.380815505981445
Average Return: 30.432716369628906

Iteration: 260
Train Loss: 46.635955810546875
Average Return: 29.86541748046875

Iteration: 270
Train Loss: 30.219234466552734
Average Return: 30.262235641479492

Iteration: 280
Train Loss: 27.41463851928711
Average Return: 30.821786880493164

Iteration: 290
Train Loss: 48

  return np.exp(average_returns/std_returns)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)



New best average return found at -43.482452392578125! Saving checkpoint at iteration 0

New best average return found at 6.350119590759277! Saving checkpoint at iteration 10

Iteration: 10
Train Loss: 5.717967510223389
Average Return: 6.350119590759277

Iteration: 20
Train Loss: 17.89725685119629
Average Return: -0.9094057083129883

Iteration: 30
Train Loss: 5.808187961578369
Average Return: 2.1024227142333984

Iteration: 40
Train Loss: 12.159335136413574
Average Return: 3.8655874729156494

New best average return found at 8.949485778808594! Saving checkpoint at iteration 50

Iteration: 50
Train Loss: 31.96603012084961
Average Return: 8.949485778808594

New best average return found at 11.190781593322754! Saving checkpoint at iteration 60

Iteration: 60
Train Loss: 87.64916229248047
Average Return: 11.190781593322754

New best average return found at 12.906606674194336! Saving checkpoint at iteration 70

Iteration: 70
Train Loss: 8.28457260131836
Average Return: 12.906606674194336

Ne

  return np.exp(average_returns/std_returns)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)



New best average return found at -58.91674041748047! Saving checkpoint at iteration 0

New best average return found at 26.361770629882812! Saving checkpoint at iteration 10

Iteration: 10
Train Loss: 6.428366184234619
Average Return: 26.361770629882812

Iteration: 20
Train Loss: 40.21513748168945
Average Return: 16.489662170410156

Iteration: 30
Train Loss: 6.854959487915039
Average Return: 15.390291213989258

Iteration: 40
Train Loss: 28.67376136779785
Average Return: 24.604101181030273

Iteration: 50
Train Loss: 27.152767181396484
Average Return: 24.73705291748047

Iteration: 60
Train Loss: 1053.169921875
Average Return: 26.084291458129883

Iteration: 70
Train Loss: 55.041316986083984
Average Return: 20.81643295288086

Iteration: 80
Train Loss: 22.07404136657715
Average Return: 20.322132110595703

Iteration: 90
Train Loss: 9.879684448242188
Average Return: 20.790447235107422

Iteration: 100
Train Loss: 44.56101608276367
Average Return: 19.316007614135742

Iteration: 110
Train Loss:

  return np.exp(average_returns/std_downfall_returns)



Iteration: 730
Train Loss: 30.38315773010254
Average Return: 20.944732666015625

Iteration: 740
Train Loss: 26.6074275970459
Average Return: 21.806758880615234

Iteration: 750
Train Loss: 45.98372268676758
Average Return: 25.34726905822754

Iteration: 760
Train Loss: 12.464831352233887
Average Return: 24.05818748474121

Iteration: 770
Train Loss: 23.500608444213867
Average Return: 19.314208984375

Iteration: 780
Train Loss: 14.037674903869629
Average Return: 20.93787956237793

Iteration: 790
Train Loss: 14.23483943939209
Average Return: 24.04638671875

Iteration: 800
Train Loss: 54.20442199707031
Average Return: 24.16851806640625

Iteration: 810
Train Loss: 16.710580825805664
Average Return: 23.73822784423828

Iteration: 820
Train Loss: 39.46760940551758
Average Return: 21.864519119262695

Iteration: 830
Train Loss: 57.25161361694336
Average Return: 25.275304794311523

Iteration: 840
Train Loss: 8.103677749633789
Average Return: 23.893707275390625

Iteration: 850
Train Loss: 28.092546