In [None]:
from stable_baselines.common.policies import MlpLnLstmPolicy
from stable_baselines.common.vec_env import DummyVecEnv
from stable_baselines import PPO2

import sys
import os

sys.path.append(os.path.dirname(os.path.abspath('')))

from tensortrade.environments import TradingEnvironment
from tensortrade.actions.discrete import SimpleDiscreteStrategy
from tensortrade.rewards.simple import IncrementalProfitStrategy
from tensortrade.exchanges.simulated import GeneratedExchange

env = DummyVecEnv([lambda: TradingEnvironment(action_strategy=SimpleDiscreteStrategy(),
                                              reward_strategy=IncrementalProfitStrategy(),
                                              exchange=GeneratedExchange())])

agent = PPO2(MlpLnLstmPolicy, env, verbose=1, nminibatches=1)

agent.learn(1000)

obs = env.reset()


state = None
exchange = None

for _ in range(1000):
    action, state = agent.predict(obs, state=state)
    obs, reward , _, info = env.step(action)
    
    print('Info: ', info)
    
    exchange = info[0]['exchange']
    
print('Trades: ', exchange.trades())
print('Portfolio: ', exchange.portfolio()) 
print('P/L: ', exchange.profit_loss_percent)


--------------------------------------
| approxkl           | 0.022459334   |
| clipfrac           | 0.2734375     |
| explained_variance | -5.32         |
| fps                | 5             |
| nupdates           | 1             |
| policy_entropy     | 3.1121836     |
| policy_loss        | -0.0133532975 |
| serial_timesteps   | 128           |
| time_elapsed       | 0             |
| total_timesteps    | 4             |
| value_loss         | 0.023782305   |
--------------------------------------
-------------------------------------
| approxkl           | 0.006769754  |
| clipfrac           | 0.095703125  |
| explained_variance | -6.02        |
| fps                | 38           |
| nupdates           | 2            |
| policy_entropy     | 3.1208048    |
| policy_loss        | -0.033199027 |
| serial_timesteps   | 256          |
| time_elapsed       | 25.4         |
| total_timesteps    | 8            |
| value_loss         | 0.027380819  |
-------------------------------------