In [1]:
import sys
from baselines import logger, bench
from baselines.common import set_global_seeds
from baselines.common.cmd_util import make_atari_env, atari_arg_parser
from baselines.common.atari_wrappers import wrap_deepmind, make_atari
from baselines.common.vec_env.vec_frame_stack import VecFrameStack
from baselines.common.vec_env.dummy_vec_env import DummyVecEnv
from baselines.ppo2 import ppo2
from baselines.ppo2.policies import CnnPolicy, LstmPolicy, LnLstmPolicy, MlpPolicy
import multiprocessing
import tensorflow as tf

  from ._conv import register_converters as _register_converters


In [2]:
def train(env_id, num_timesteps, seed, policy):

    ncpu = multiprocessing.cpu_count()
    if sys.platform == 'darwin': ncpu //= 2
    config = tf.ConfigProto(allow_soft_placement=True,
                            intra_op_parallelism_threads=ncpu,
                            inter_op_parallelism_threads=ncpu)
    config.gpu_options.allow_growth = True #pylint: disable=E1101
    tf.Session(config=config).__enter__()
    
    def make_env():
        env = make_atari(env_id)
        env = bench.Monitor(env, logger.get_dir(), allow_early_resets=True)
        return wrap_deepmind(env)
    
    set_global_seeds(seed)
    
    env = DummyVecEnv([make_env])

    #env = VecFrameStack(make_atari_env(env_id, 8, seed), 4)
    policy = {'cnn' : CnnPolicy, 'lstm' : LstmPolicy, 'lnlstm' : LnLstmPolicy, 'mlp': MlpPolicy}[policy]
    ppo2.learn(policy=policy, env=env, nsteps=128, nminibatches=4,
        lam=0.95, gamma=0.99, noptepochs=4, log_interval=1,
        ent_coef=.01,
        lr=lambda f : f * 2.5e-4,
        cliprange=lambda f : f * 0.1,
        total_timesteps=int(num_timesteps * 1.1))

In [3]:
logger.configure()

train('BreakoutNoFrameskip-v4', num_timesteps=1e6, seed=0, policy='cnn')

Logging to /tmp/openai-2018-05-29-14-05-05-497996
Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See @{tf.nn.softmax_cross_entropy_with_logits_v2}.

Instructions for updating:
keep_dims is deprecated, use keepdims instead
Instructions for updating:
keep_dims is deprecated, use keepdims instead
--------------------------------------
| approxkl           | 0.003608401   |
| clipfrac           | 0.22851562    |
| eplenmean          | nan           |
| eprewmean          | nan           |
| explained_variance | 0.0315        |
| fps                | 83            |
| nupdates           | 1             |
| policy_entropy     | 1.3826978     |
| policy_loss        | -0.0109578855 |
| serial_timesteps   | 128           |
| time_elapsed       | 1.54          |
| total_timesteps    | 128           |
| value_loss         | 0.052479424   |
--------------------------------------
Saving to /tmp/openai-2018-

--------------------------------------
| approxkl           | 0.0048580747  |
| clipfrac           | 0.28320312    |
| eplenmean          | 161           |
| eprewmean          | 0.75          |
| explained_variance | -0.228        |
| fps                | 204           |
| nupdates           | 14            |
| policy_entropy     | 1.2966607     |
| policy_loss        | -0.0044763307 |
| serial_timesteps   | 1792          |
| time_elapsed       | 9.39          |
| total_timesteps    | 1792          |
| value_loss         | 0.0008044718  |
--------------------------------------
--------------------------------------
| approxkl           | 0.0005203548  |
| clipfrac           | 0.0234375     |
| eplenmean          | 161           |
| eprewmean          | 0.75          |
| explained_variance | 0.123         |
| fps                | 216           |
| nupdates           | 15            |
| policy_entropy     | 1.3094387     |
| policy_loss        | 0.00069902395 |
| serial_timesteps   | 19

-------------------------------------
| approxkl           | 0.0029675744 |
| clipfrac           | 0.22070312   |
| eplenmean          | 204          |
| eprewmean          | 1.89         |
| explained_variance | 0.37         |
| fps                | 228          |
| nupdates           | 29           |
| policy_entropy     | 1.2533236    |
| policy_loss        | -0.007771855 |
| serial_timesteps   | 3712         |
| time_elapsed       | 18.1         |
| total_timesteps    | 3712         |
| value_loss         | 0.036418125  |
-------------------------------------
--------------------------------------
| approxkl           | 0.00089759147 |
| clipfrac           | 0.041015625   |
| eplenmean          | 204           |
| eprewmean          | 1.89          |
| explained_variance | 0.356         |
| fps                | 226           |
| nupdates           | 30            |
| policy_entropy     | 1.2779698     |
| policy_loss        | -0.003119274  |
| serial_timesteps   | 3840          |
|

KeyboardInterrupt: 

In [6]:
load_path = '../data/processed/atari-00950'
!head $load_path

�]q (cjoblib.numpy_pickle
NumpyArrayWrapper
q)�q}q(X   subclassqcnumpy
ndarray
qX   shapeq(KKKK tqX   orderqX   Cq	X   dtypeq
cnumpy
dtype
qX   f4qK K�qRq(KX   <qNNNJ����J����K tqbX
   allow_mmapq�ub������<*)�h�>�)��w=�ʓ��<"��ٽќ��J�=�J>c˽9;�=�����=�;�:;=���>��(��p���<�
?�-�(��޴=��f���<4�^=���=��J<8p]��,ܽ��ؽ�=zM���ܽ��H>0�g�uQ6��&�m�ʽ7N����f=��j��bi���;� H�=T��=�.=t�Ƚ�����)�$�f<@�
