In [4]:
from __future__ import absolute_import
from __future__ import print_function
from __future__ import division
import numpy as np

from tensorforce.agents import PPOAgent
from tensorforce.execution import Runner
from env.gymWrapper import create_btc_env

import os

# issue 287
LOAD_DIR = os.path.join(os.getcwd(), "model")
SAVE_DIR = os.path.join(LOAD_DIR, "ppo_agent")


# Callback function printing episode statistics
def episode_finished(r):
    reward = "%.6f" % (r.episode_rewards[-1])
    print("Finished episode {ep} after {ts} timesteps (reward: {reward})".format(ep=r.episode, ts=r.episode_timestep,
                                                                                 reward=reward))

    if np.mean(r.episode_rewards[-1]) > 0 :
        r.agent.save_model(SAVE_DIR, append_timestep=False)
    return True

def print_simple_log(r):
    print("Finished episode {ep} after {ts} timesteps (reward: {reward})".format(ep=r.episode, ts=r.episode_timestep,
                                                                                 reward=r.episode_rewards[-1]))

def create_network_spec():
    network_spec = [
        {
            "type": "flatten"
        },
        dict(type='dense', size=32, activation='relu'),
        dict(type='dense', size=32, activation='relu'),
        dict(type='internal_lstm', size=32),
    ]
    return network_spec

def create_baseline_spec():
    baseline_spec = [
        {
            "type": "lstm",
            "size": 32,
        },
        dict(type='dense', size=32, activation='relu'),
        dict(type='dense', size=32, activation='relu'),
    ]
    return baseline_spec

def main():

    # create environment for train and test
    PATH_TRAIN = "./data/train/"
    PATH_TEST = "./data/test/"
    TIMESTEP = 30 # window size
    environment = create_btc_env(window_size=TIMESTEP, path=PATH_TRAIN, train=True)
    test_environment = create_btc_env(window_size=TIMESTEP, path=PATH_TEST, train=False)

    network_spec = create_network_spec()
    baseline_spec = create_baseline_spec()

    agent = PPOAgent(
        discount=0.9999,
        states=environment.states,
        actions=environment.actions,
        network=network_spec,
        # Agent
        states_preprocessing=None,
        actions_exploration=None,
        reward_preprocessing=None,
        # MemoryModel
        update_mode=dict(
            unit= 'timesteps', #'episodes',
            # 10 episodes per update
            batch_size= 32,
            # # Every 10 episodes
            frequency=10
        ),
        memory=dict(
            type='latest',
            include_next_states=False,
            capacity=50000
        ),
        # DistributionModel
        distributions=None,
        entropy_regularization=0.0,  # None
        # PGModel

        baseline_mode='states',
        baseline=dict(type='custom', network=baseline_spec),
        baseline_optimizer=dict(
            type='multi_step',
            optimizer=dict(
                type='adam',
                learning_rate=(1e-4)  # 3e-4
            ),
            num_steps=5
        ),
        gae_lambda=0,  # 0
        # PGLRModel
        likelihood_ratio_clipping=0.2,
        # PPOAgent
        step_optimizer=dict(
            type='adam',
            learning_rate=(1e-4)  # 1e-4
        ),
        subsampling_fraction=0.2,  # 0.1
        optimization_steps=10,
        execution=dict(
            type='single',
            session_config=None,
            distributed_spec=None
        )
    )

    train_runner = Runner(agent=agent, environment=environment)
    test_runner = Runner(
        agent=agent,
        environment=test_environment,
    )

    train_runner.run(episodes=10, max_episode_timesteps=16000, episode_finished=episode_finished)
    print("Learning finished. Total episodes: {ep}. Average reward of last 100 episodes: {ar}.".format(
        ep=train_runner.episode,
        ar=np.mean(train_runner.episode_rewards[-100:]))
    )

    test_runner.run(num_episodes=1, deterministic=True, testing=True, episode_finished=print_simple_log)

if __name__ == '__main__':
    main()

INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
start episode ... XBTUSD_5m_16000_train.csv at 1824
Tick: 1900/ Portfolio (krw-won): 1009200.5942289841
Long: 7/ Short: 4
Tick: 2000/ Portfolio (krw-won): 942797.2384221172
Long: 21/ Short: 19
Tick: 2100/ Portfolio (krw-won): 901638.4060202254
Long: 29/ Short: 30
Tick: 2200/ Portfolio (krw-won): 896364.0211088819
Long: 36/ Short: 47
Tick: 2300/ Portfolio (krw-won): 890475.98799866
Long: 51/ Short: 56
Tick: 2400/ Portfolio (krw-won): 864154.3833539666
Long: 61/ Short: 66
Tick: 2500/ Portfolio (krw-won): 851733.6878620163
Long: 73/ Short: 76
Tick: 2600/ Portfolio (krw-won): 859606.3895028098
Long: 88/ Short: 83
Tick: 2700/ Portfolio (krw-won): 836891.8138522933
Long: 104/ Short: 91
Tick: 2800/ Portfolio (krw-won): 797768.2285701907
Long: 119/ Short: 101
Tick: 2900/ Portfolio (krw-won): 889974.411081231
Long: 129/ Short: 109
Tick: 3000/ Portfolio (krw-won): 815234.392247

Tick: 12800/ Portfolio (krw-won): 80427.61306667383
Long: 1259/ Short: 1216
Tick: 12900/ Portfolio (krw-won): 78392.85576567492
Long: 1268/ Short: 1226
Tick: 13000/ Portfolio (krw-won): 74964.66962460203
Long: 1275/ Short: 1244
Tick: 13100/ Portfolio (krw-won): 74316.91736437094
Long: 1287/ Short: 1257
Tick: 13200/ Portfolio (krw-won): 71711.36801921422
Long: 1296/ Short: 1265
Tick: 13300/ Portfolio (krw-won): 67052.10076904153
Long: 1303/ Short: 1279
Tick: 13400/ Portfolio (krw-won): 64075.64958614296
Long: 1320/ Short: 1291
Tick: 13500/ Portfolio (krw-won): 63791.550479820995
Long: 1337/ Short: 1299
Tick: 13600/ Portfolio (krw-won): 64205.87561526305
Long: 1349/ Short: 1308
Tick: 13700/ Portfolio (krw-won): 63240.712033318756
Long: 1362/ Short: 1317
Tick: 13800/ Portfolio (krw-won): 63711.12652780594
Long: 1380/ Short: 1327
Tick: 13900/ Portfolio (krw-won): 63160.90226681517
Long: 1392/ Short: 1343
Tick: 14000/ Portfolio (krw-won): 65528.90337168231
Long: 1403/ Short: 1356
Tick: 1410

Tick: 7100/ Portfolio (krw-won): 577775.3307622101
Long: 210/ Short: 211
Tick: 7200/ Portfolio (krw-won): 565458.1224454694
Long: 218/ Short: 225
Tick: 7300/ Portfolio (krw-won): 532045.697898439
Long: 229/ Short: 240
Tick: 7400/ Portfolio (krw-won): 515387.6348914985
Long: 237/ Short: 250
Tick: 7500/ Portfolio (krw-won): 497189.51524370187
Long: 246/ Short: 266
Tick: 7600/ Portfolio (krw-won): 485152.9096446471
Long: 259/ Short: 274
Tick: 7700/ Portfolio (krw-won): 445891.35961973324
Long: 270/ Short: 289
Tick: 7800/ Portfolio (krw-won): 436896.7306308978
Long: 276/ Short: 301
Tick: 7900/ Portfolio (krw-won): 422875.0233946299
Long: 290/ Short: 312
Tick: 8000/ Portfolio (krw-won): 421007.44153908046
Long: 298/ Short: 323
Tick: 8100/ Portfolio (krw-won): 389007.82397704973
Long: 313/ Short: 331
Tick: 8200/ Portfolio (krw-won): 371741.02175130544
Long: 328/ Short: 341
Tick: 8300/ Portfolio (krw-won): 365789.90572705795
Long: 339/ Short: 354
Tick: 8400/ Portfolio (krw-won): 354727.999244

Tick: 11900/ Portfolio (krw-won): 822007.7071348509
Long: 227/ Short: 228
Tick: 12000/ Portfolio (krw-won): 812834.020803155
Long: 239/ Short: 240
Tick: 12100/ Portfolio (krw-won): 795374.5090136507
Long: 257/ Short: 249
Tick: 12200/ Portfolio (krw-won): 798539.0186267949
Long: 275/ Short: 254
Tick: 12300/ Portfolio (krw-won): 817149.6501799708
Long: 287/ Short: 266
Tick: 12400/ Portfolio (krw-won): 808556.0205426363
Long: 304/ Short: 273
Tick: 12500/ Portfolio (krw-won): 791138.7820459818
Long: 314/ Short: 283
Tick: 12600/ Portfolio (krw-won): 765751.6484162703
Long: 329/ Short: 289
Tick: 12700/ Portfolio (krw-won): 744475.1322151537
Long: 338/ Short: 305
Tick: 12800/ Portfolio (krw-won): 686502.6035241925
Long: 347/ Short: 319
Tick: 12900/ Portfolio (krw-won): 630986.0508435024
Long: 358/ Short: 327
Tick: 13000/ Portfolio (krw-won): 620958.8520543936
Long: 371/ Short: 342
Tick: 13100/ Portfolio (krw-won): 594978.603356857
Long: 378/ Short: 359
Tick: 13200/ Portfolio (krw-won): 615390

Tick: 9600/ Portfolio (krw-won): 134653.8304559682
Long: 826/ Short: 743
Tick: 9700/ Portfolio (krw-won): 133249.87295525376
Long: 839/ Short: 755
Tick: 9800/ Portfolio (krw-won): 133260.9339397446
Long: 850/ Short: 767
Tick: 9900/ Portfolio (krw-won): 124282.93097596288
Long: 863/ Short: 782
Tick: 10000/ Portfolio (krw-won): 120521.982160036
Long: 875/ Short: 792
Tick: 10100/ Portfolio (krw-won): 119481.15832681925
Long: 884/ Short: 808
Tick: 10200/ Portfolio (krw-won): 123121.9478731612
Long: 894/ Short: 819
Tick: 10300/ Portfolio (krw-won): 122830.19699812027
Long: 906/ Short: 828
Tick: 10400/ Portfolio (krw-won): 120639.73580772382
Long: 920/ Short: 839
Tick: 10500/ Portfolio (krw-won): 116183.75946499626
Long: 931/ Short: 849
Tick: 10600/ Portfolio (krw-won): 113999.12296453367
Long: 940/ Short: 859
Tick: 10700/ Portfolio (krw-won): 117037.80732779765
Long: 948/ Short: 870
Tick: 10800/ Portfolio (krw-won): 120022.90911107864
Long: 960/ Short: 883
Tick: 10900/ Portfolio (krw-won): 

Tick: 15600/ Portfolio (krw-won): 436506.4890364667
Long: 488/ Short: 529
Tick: 15700/ Portfolio (krw-won): 407406.5880690667
Long: 499/ Short: 541
Tick: 15800/ Portfolio (krw-won): 370533.28274323884
Long: 511/ Short: 551
Tick: 15900/ Portfolio (krw-won): 333530.80543636985
Long: 524/ Short: 559
Finished episode 6 after 4926 timesteps (reward: -1.068464)
start episode ... XBTUSD_5m_16000_train.csv at 10210
Tick: 10300/ Portfolio (krw-won): 938921.4686236379
Long: 8/ Short: 7
Tick: 10400/ Portfolio (krw-won): 894008.3476368375
Long: 17/ Short: 19
Tick: 10500/ Portfolio (krw-won): 884041.3777761725
Long: 30/ Short: 27
Tick: 10600/ Portfolio (krw-won): 842928.9421827303
Long: 39/ Short: 39
Tick: 10700/ Portfolio (krw-won): 844973.2442554536
Long: 49/ Short: 51
Tick: 10800/ Portfolio (krw-won): 872401.2080705167
Long: 62/ Short: 60
Tick: 10900/ Portfolio (krw-won): 933707.3935042599
Long: 76/ Short: 70
Tick: 11000/ Portfolio (krw-won): 880926.6694778298
Long: 88/ Short: 80
Tick: 11100/ Po

Tick: 14800/ Portfolio (krw-won): 754784.6126252332
Long: 171/ Short: 148
Tick: 14900/ Portfolio (krw-won): 786874.7557237013
Long: 177/ Short: 161
Tick: 15000/ Portfolio (krw-won): 760962.8497693994
Long: 184/ Short: 172
Tick: 15100/ Portfolio (krw-won): 710397.6542323434
Long: 199/ Short: 184
Tick: 15200/ Portfolio (krw-won): 673148.8838708545
Long: 212/ Short: 196
Tick: 15300/ Portfolio (krw-won): 634358.8388076103
Long: 225/ Short: 208
Tick: 15400/ Portfolio (krw-won): 600866.4542858782
Long: 239/ Short: 216
Tick: 15500/ Portfolio (krw-won): 578612.4395379465
Long: 253/ Short: 228
Tick: 15600/ Portfolio (krw-won): 592698.3359940582
Long: 263/ Short: 238
Tick: 15700/ Portfolio (krw-won): 583736.7431568186
Long: 271/ Short: 253
Tick: 15800/ Portfolio (krw-won): 581620.7561997176
Long: 283/ Short: 266
Tick: 15900/ Portfolio (krw-won): 584396.3513887342
Long: 300/ Short: 277
Finished episode 9 after 2562 timesteps (reward: -0.489178)
start episode ... XBTUSD_5m_16000_train.csv at 2757


Tick: 12600/ Portfolio (krw-won): 56636.69521843126
Long: 1141/ Short: 1139
Tick: 12700/ Portfolio (krw-won): 51295.60362562225
Long: 1156/ Short: 1146
Tick: 12800/ Portfolio (krw-won): 51595.84750782279
Long: 1168/ Short: 1156
Tick: 12900/ Portfolio (krw-won): 49711.716999266006
Long: 1183/ Short: 1165
Tick: 13000/ Portfolio (krw-won): 45865.78643647806
Long: 1193/ Short: 1179
Tick: 13100/ Portfolio (krw-won): 46880.93364007226
Long: 1209/ Short: 1186
Tick: 13200/ Portfolio (krw-won): 46200.65218198893
Long: 1222/ Short: 1198
Tick: 13300/ Portfolio (krw-won): 43359.07308939782
Long: 1231/ Short: 1212
Tick: 13400/ Portfolio (krw-won): 42868.06789588167
Long: 1238/ Short: 1223
Tick: 13500/ Portfolio (krw-won): 41312.12181541189
Long: 1248/ Short: 1238
Tick: 13600/ Portfolio (krw-won): 39985.65795237994
Long: 1262/ Short: 1247
Tick: 13700/ Portfolio (krw-won): 42251.05324536955
Long: 1276/ Short: 1259
Tick: 13800/ Portfolio (krw-won): 42587.23232307738
Long: 1284/ Short: 1270
Tick: 13900

Tick: 7900/ Portfolio (krw-won): 1170042.5708313915
Long: 1/ Short: 2
Tick: 8000/ Portfolio (krw-won): 1194371.909244605
Long: 1/ Short: 2
Tick: 8100/ Portfolio (krw-won): 1210118.594748201
Long: 1/ Short: 2
Tick: 8200/ Portfolio (krw-won): 1185966.5838744426
Long: 1/ Short: 2
Tick: 8300/ Portfolio (krw-won): 1135534.6316534656
Long: 1/ Short: 2
Tick: 8400/ Portfolio (krw-won): 1130817.7191039652
Long: 1/ Short: 2
Tick: 8500/ Portfolio (krw-won): 1097231.883131205
Long: 1/ Short: 2
Tick: 8600/ Portfolio (krw-won): 1066696.0808370695
Long: 1/ Short: 2
Tick: 8700/ Portfolio (krw-won): 1078115.9743779658
Long: 1/ Short: 2
Tick: 8800/ Portfolio (krw-won): 1054886.0667093724
Long: 1/ Short: 2
Tick: 8900/ Portfolio (krw-won): 1048927.8613836875
Long: 1/ Short: 2
Tick: 9000/ Portfolio (krw-won): 1019988.0069446459
Long: 1/ Short: 2
Tick: 9100/ Portfolio (krw-won): 989523.1356662924
Long: 1/ Short: 2
Tick: 9200/ Portfolio (krw-won): 1033677.6929905651
Long: 1/ Short: 2
Tick: 9300/ Portfolio (k