In [1]:
from stable_baselines.common.policies import MlpLnLstmPolicy
from stable_baselines.common.vec_env import DummyVecEnv
from stable_baselines import PPO2

import sys
import os

sys.path.append(os.path.dirname(os.path.abspath('')))

from tensortrade.environments import TradingEnvironment
from tensortrade.environments.actions.discrete import SimpleDiscreteStrategy
from tensortrade.environments.rewards.simple import IncrementalProfitStrategy
from tensortrade.exchanges.simulated import GeneratedExchange

env = DummyVecEnv([lambda: TradingEnvironment(action_strategy=SimpleDiscreteStrategy(),
                                              reward_strategy=IncrementalProfitStrategy(),
                                              exchange=GeneratedExchange())])

agent = PPO2(MlpLnLstmPolicy, env, verbose=1, nminibatches=1)

agent.learn(7500)

obs = env.reset()


state = None
exchange = None

for _ in range(2500):
    action, state = agent.predict(obs, state=state)
    obs, reward , _, info = env.step(action)
    
    exchange = info[0]['exchange']
    
print('Trades: ', exchange.trades())
print('Portfolio: ', exchange.portfolio()) 
print('P/L: ', exchange.profit_loss_percent())


-------------------------------------
| approxkl           | 0.022325728  |
| clipfrac           | 0.2265625    |
| explained_variance | -6.03        |
| fps                | 4            |
| nupdates           | 1            |
| policy_entropy     | 3.1261897    |
| policy_loss        | -0.024499245 |
| serial_timesteps   | 128          |
| time_elapsed       | 0            |
| total_timesteps    | 4            |
| value_loss         | 0.022471774  |
-------------------------------------
-------------------------------------
| approxkl           | 0.0070868125 |
| clipfrac           | 0.095703125  |
| explained_variance | -11.6        |
| fps                | 43           |
| nupdates           | 2            |
| policy_entropy     | 3.1229749    |
| policy_loss        | -0.03916142  |
| serial_timesteps   | 256          |
| time_elapsed       | 28           |
| total_timesteps    | 8            |
| value_loss         | 0.014777485  |
-------------------------------------
------------

-------------------------------------
| approxkl           | 0.0028300483 |
| clipfrac           | 0.03125      |
| explained_variance | -8.37        |
| fps                | 41           |
| nupdates           | 18           |
| policy_entropy     | 2.9800687    |
| policy_loss        | -0.02539976  |
| serial_timesteps   | 2304         |
| time_elapsed       | 76.6         |
| total_timesteps    | 72           |
| value_loss         | 0.0031218277 |
-------------------------------------
-------------------------------------
| approxkl           | 0.0038239816 |
| clipfrac           | 0.056640625  |
| explained_variance | -45.2        |
| fps                | 41           |
| nupdates           | 19           |
| policy_entropy     | 2.971603     |
| policy_loss        | -0.027134504 |
| serial_timesteps   | 2432         |
| time_elapsed       | 79.6         |
| total_timesteps    | 76           |
| value_loss         | 0.0032402324 |
-------------------------------------
------------

-------------------------------------
| approxkl           | 0.006152682  |
| clipfrac           | 0.0859375    |
| explained_variance | -1.91        |
| fps                | 44           |
| nupdates           | 35           |
| policy_entropy     | 2.969111     |
| policy_loss        | -0.024361473 |
| serial_timesteps   | 4480         |
| time_elapsed       | 129          |
| total_timesteps    | 140          |
| value_loss         | 0.0016564863 |
-------------------------------------
-------------------------------------
| approxkl           | 0.0028458503 |
| clipfrac           | 0.041015625  |
| explained_variance | -3.46        |
| fps                | 43           |
| nupdates           | 36           |
| policy_entropy     | 2.9434595    |
| policy_loss        | -0.011109933 |
| serial_timesteps   | 4608         |
| time_elapsed       | 131          |
| total_timesteps    | 144          |
| value_loss         | 0.0004977039 |
-------------------------------------
------------

-------------------------------------
| approxkl           | 0.0031977105 |
| clipfrac           | 0.03125      |
| explained_variance | -19.5        |
| fps                | 44           |
| nupdates           | 52           |
| policy_entropy     | 2.8352118    |
| policy_loss        | -0.021639515 |
| serial_timesteps   | 6656         |
| time_elapsed       | 179          |
| total_timesteps    | 208          |
| value_loss         | 0.0006587289 |
-------------------------------------
-------------------------------------
| approxkl           | 0.0026631549 |
| clipfrac           | 0.02734375   |
| explained_variance | -10.5        |
| fps                | 41           |
| nupdates           | 53           |
| policy_entropy     | 2.8683934    |
| policy_loss        | -0.013597782 |
| serial_timesteps   | 6784         |
| time_elapsed       | 182          |
| total_timesteps    | 212          |
| value_loss         | 0.0011726245 |
-------------------------------------
------------