In [1]:
#!/usr/bin/env python3
import numpy as np
import gym
import os
import multiprocessing
from pathlib import Path

import matplotlib.pyplot as plt
import time
from IPython import display
%matplotlib notebook

from stable_baselines.common.cmd_util import mujoco_arg_parser
from stable_baselines import bench, logger
from stable_baselines.common import set_global_seeds
from stable_baselines.common.vec_env.vec_normalize import VecNormalize
from stable_baselines.ppo2 import PPO2
from stable_baselines.common.cmd_util import make_atari_env
from stable_baselines.common.policies import CnnPolicy
from stable_baselines.common.vec_env import VecFrameStack
from stable_baselines.common.vec_env.dummy_vec_env import DummyVecEnv
from stable_baselines.common.vec_env.subproc_vec_env import SubprocVecEnv
from stable_baselines.results_plotter import load_results, ts2xy


def train(env_id, num_timesteps, seed):
    """
    Train PPO2 model for Mujoco environment, for testing purposes
    :param env_id: (str) the environment id string
    :param num_timesteps: (int) the number of timesteps to run
    :param seed: (int) Used to seed the random generator.
    """

    n_cpu = multiprocessing.cpu_count()
    
    print("CPUs:", n_cpu)

    env_out = make_atari_env(env_id, num_env=(n_cpu-2), seed=0)
    #env_out = bench.Monitor(env_out, log_dir, allow_early_resets=True)
    env = VecFrameStack(env_out, n_stack=4)

    def callback(_locals, _globals):
        global n_steps, best_mean_reward
        print("Step:", n_steps)

        if (n_steps + 1) % 10 == 0:
            print("Saving new model")
            _locals['self'].save(log_dir + "/models/model_{}".format(n_steps))
        n_steps += 1
        return False

    set_global_seeds(seed)
    policy = CnnPolicy
    model = PPO2(policy=policy, env=env, n_steps=2048, nminibatches=1, lam=0.95, gamma=0.99, noptepochs=10,
                 ent_coef=0.0, learning_rate=3e-4, cliprange=0.2, verbose=1, tensorboard_log=log_dir)
    model.learn(total_timesteps=num_timesteps, callback=callback)
    model.save("model_{}".format(env_id))

    return model, env

In [2]:
env_id='BreakoutNoFrameskip-v4'
num_timesteps=20000000
seed=343
best_mean_reward, n_steps = -np.inf, 0

base_dir = str(Path.home()) + '/ppo_logs'
os.makedirs(base_dir, exist_ok=True)
prev = [f for f in os.listdir(base_dir) if env_id in f]
log_dir = base_dir + '/{}-{}'.format(env_id, len(prev))
os.makedirs(log_dir, exist_ok=True)
os.makedirs(log_dir + '/models', exist_ok=True)

print('Logging to {}'.format(log_dir))

logger.configure()
model, env = train(env_id, num_timesteps, seed)

Logging to /home/nathan/ppo_logs/BreakoutNoFrameskip-v4-3
Logging to /tmp/openai-2018-11-06-10-30-31-123497
CPUs: 4


ResourceExhaustedError: OOM when allocating tensor with shape[4096,84,84,4] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
	 [[{{node train_model/input/ToFloat}} = Cast[DstT=DT_FLOAT, SrcT=DT_UINT8, Truncate=false, _device="/job:localhost/replica:0/task:0/device:GPU:0"](_arg_train_model/input/Ob_0_7/_23)]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.

	 [[{{node loss/mul_4/_43}} = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_229_loss/mul_4", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.


Caused by op 'train_model/input/ToFloat', defined at:
  File "/home/nathan/anaconda3/lib/python3.6/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/home/nathan/anaconda3/lib/python3.6/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/home/nathan/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/home/nathan/anaconda3/lib/python3.6/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/home/nathan/anaconda3/lib/python3.6/site-packages/ipykernel/kernelapp.py", line 505, in start
    self.io_loop.start()
  File "/home/nathan/anaconda3/lib/python3.6/site-packages/tornado/platform/asyncio.py", line 132, in start
    self.asyncio_loop.run_forever()
  File "/home/nathan/anaconda3/lib/python3.6/asyncio/base_events.py", line 422, in run_forever
    self._run_once()
  File "/home/nathan/anaconda3/lib/python3.6/asyncio/base_events.py", line 1434, in _run_once
    handle._run()
  File "/home/nathan/anaconda3/lib/python3.6/asyncio/events.py", line 145, in _run
    self._callback(*self._args)
  File "/home/nathan/anaconda3/lib/python3.6/site-packages/tornado/ioloop.py", line 758, in _run_callback
    ret = callback()
  File "/home/nathan/anaconda3/lib/python3.6/site-packages/tornado/stack_context.py", line 300, in null_wrapper
    return fn(*args, **kwargs)
  File "/home/nathan/anaconda3/lib/python3.6/site-packages/tornado/gen.py", line 1233, in inner
    self.run()
  File "/home/nathan/anaconda3/lib/python3.6/site-packages/tornado/gen.py", line 1147, in run
    yielded = self.gen.send(value)
  File "/home/nathan/anaconda3/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 370, in dispatch_queue
    yield self.process_one()
  File "/home/nathan/anaconda3/lib/python3.6/site-packages/tornado/gen.py", line 346, in wrapper
    runner = Runner(result, future, yielded)
  File "/home/nathan/anaconda3/lib/python3.6/site-packages/tornado/gen.py", line 1080, in __init__
    self.run()
  File "/home/nathan/anaconda3/lib/python3.6/site-packages/tornado/gen.py", line 1147, in run
    yielded = self.gen.send(value)
  File "/home/nathan/anaconda3/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 357, in process_one
    yield gen.maybe_future(dispatch(*args))
  File "/home/nathan/anaconda3/lib/python3.6/site-packages/tornado/gen.py", line 326, in wrapper
    yielded = next(result)
  File "/home/nathan/anaconda3/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 267, in dispatch_shell
    yield gen.maybe_future(handler(stream, idents, msg))
  File "/home/nathan/anaconda3/lib/python3.6/site-packages/tornado/gen.py", line 326, in wrapper
    yielded = next(result)
  File "/home/nathan/anaconda3/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 534, in execute_request
    user_expressions, allow_stdin,
  File "/home/nathan/anaconda3/lib/python3.6/site-packages/tornado/gen.py", line 326, in wrapper
    yielded = next(result)
  File "/home/nathan/anaconda3/lib/python3.6/site-packages/ipykernel/ipkernel.py", line 294, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/home/nathan/anaconda3/lib/python3.6/site-packages/ipykernel/zmqshell.py", line 536, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/home/nathan/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2819, in run_cell
    raw_cell, store_history, silent, shell_futures)
  File "/home/nathan/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2845, in _run_cell
    return runner(coro)
  File "/home/nathan/anaconda3/lib/python3.6/site-packages/IPython/core/async_helpers.py", line 67, in _pseudo_sync_runner
    coro.send(None)
  File "/home/nathan/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3020, in run_cell_async
    interactivity=interactivity, compiler=compiler, result=result)
  File "/home/nathan/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3185, in run_ast_nodes
    if (yield from self.run_code(code, result)):
  File "/home/nathan/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3267, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-2-62d83633c73a>", line 16, in <module>
    model, env = train(env_id, num_timesteps, seed)
  File "<ipython-input-1-e2ca4f135f77>", line 55, in train
    ent_coef=0.0, learning_rate=3e-4, cliprange=0.2, verbose=1, tensorboard_log=log_dir)
  File "/home/nathan/anaconda3/lib/python3.6/site-packages/stable_baselines/ppo2/ppo2.py", line 97, in __init__
    self.setup_model()
  File "/home/nathan/anaconda3/lib/python3.6/site-packages/stable_baselines/ppo2/ppo2.py", line 129, in setup_model
    reuse=True)
  File "/home/nathan/anaconda3/lib/python3.6/site-packages/stable_baselines/common/policies.py", line 309, in __init__
    feature_extraction="cnn", **_kwargs)
  File "/home/nathan/anaconda3/lib/python3.6/site-packages/stable_baselines/common/policies.py", line 248, in __init__
    reuse=reuse, scale=(feature_extraction == "cnn"))
  File "/home/nathan/anaconda3/lib/python3.6/site-packages/stable_baselines/common/policies.py", line 105, in __init__
    reuse=reuse, scale=scale)
  File "/home/nathan/anaconda3/lib/python3.6/site-packages/stable_baselines/common/policies.py", line 52, in __init__
    self.obs_ph, self.processed_x = observation_input(ob_space, n_batch, scale=scale)
  File "/home/nathan/anaconda3/lib/python3.6/site-packages/stable_baselines/common/input.py", line 26, in observation_input
    processed_x = tf.to_float(input_x)
  File "/home/nathan/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/math_ops.py", line 728, in to_float
    return cast(x, dtypes.float32, name=name)
  File "/home/nathan/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/math_ops.py", line 673, in cast
    x = gen_math_ops.cast(x, base_type, name=name)
  File "/home/nathan/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/gen_math_ops.py", line 1659, in cast
    "Cast", x=x, DstT=DstT, Truncate=Truncate, name=name)
  File "/home/nathan/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "/home/nathan/anaconda3/lib/python3.6/site-packages/tensorflow/python/util/deprecation.py", line 488, in new_func
    return func(*args, **kwargs)
  File "/home/nathan/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 3272, in create_op
    op_def=op_def)
  File "/home/nathan/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1768, in __init__
    self._traceback = tf_stack.extract_stack()

ResourceExhaustedError (see above for traceback): OOM when allocating tensor with shape[4096,84,84,4] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
	 [[{{node train_model/input/ToFloat}} = Cast[DstT=DT_FLOAT, SrcT=DT_UINT8, Truncate=false, _device="/job:localhost/replica:0/task:0/device:GPU:0"](_arg_train_model/input/Ob_0_7/_23)]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.

	 [[{{node loss/mul_4/_43}} = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_229_loss/mul_4", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.

