In [2]:
import ray 
from seagul.rl.algos import sac, ppo
from seagul.nn import MLP
from seagul.rl.models import SACModel, PPOModel 
import gym
import torch

ray.init()

2020-01-13 15:08:38,353	INFO resource_spec.py:205 -- Starting Ray with 4.44 GiB memory available for workers and up to 2.22 GiB for objects. You can adjust these settings with ray.init(memory=<bytes>, object_store_memory=<bytes>).


{'node_ip_address': '192.168.0.12',
 'redis_address': '192.168.0.12:45857',
 'object_store_address': '/tmp/ray/session_2020-01-13_15-08-38_352885_15543/sockets/plasma_store',
 'raylet_socket_name': '/tmp/ray/session_2020-01-13_15-08-38_352885_15543/sockets/raylet',
 'webui_url': None,
 'session_dir': '/tmp/ray/session_2020-01-13_15-08-38_352885_15543'}

In [25]:
input_size = 3
output_size = 1
layer_size = 64
num_layers = 2


policy = MLP(input_size, output_size*2, num_layers, layer_size)
value_fn = MLP(input_size, 1, num_layers, layer_size)
q1_fn = MLP(input_size + output_size, 1, num_layers, layer_size)
q2_fn = MLP(input_size + output_size, 1, num_layers, layer_size)
model = SACModel(policy, value_fn, q1_fn, q2_fn, 3)

@ray.remote
def do_rollout(env, model, num_steps):
    
    acts_list = []
    obs1_list = []
    obs2_list = []
    rews_list = []
    done_list = []

    dtype = torch.float32
    act_size = env.action_space.shape[0]
    obs = env.reset()
    done = False
    cur_step = 0

    while not done:
        obs = torch.as_tensor(obs, dtype=dtype).detach()
        obs1_list.append(obs.clone())

        noise = torch.randn(1, act_size)
        act, _ = model.select_action(obs.reshape(1, -1), noise)
        act = act.detach()

        obs, rew, done, _ = env.step(act.numpy().reshape(-1))
        obs = torch.as_tensor(obs, dtype=dtype).detach()

        acts_list.append(torch.as_tensor(act.clone(), dtype=dtype))
        rews_list.append(torch.as_tensor(rew, dtype=dtype))
        obs2_list.append(obs.clone())


        if cur_step < num_steps:
            done_list.append(torch.as_tensor(done))
        else:
            done_list.append(torch.as_tensor(False))

        cur_step+=1

    ep_obs1 = torch.stack(obs1_list)
    ep_acts = torch.stack(acts_list)
    ep_rews = torch.stack(rews_list).reshape(-1, 1)
    ep_obs2 = torch.stack(obs2_list)
    ep_done = torch.stack(done_list).reshape(-1, 1)
    
    print(torch.pow(ep_obs1,2).mean())
    return (ep_obs1, ep_obs2, ep_acts, ep_rews, ep_done)

def do_rollout_local(env, model, num_steps):
      
    acts_list = []
    obs1_list = []
    obs2_list = []
    rews_list = []
    done_list = []

    dtype = torch.float32
    act_size = env.action_space.shape[0]
    obs = env.reset()
    done = False
    cur_step = 0

    while not done:
        obs = torch.as_tensor(obs, dtype=dtype).detach()
        obs1_list.append(obs.clone())

        noise = torch.randn(1, act_size)
        act, _ = model.select_action(obs.reshape(1, -1), noise)
        act = act.detach()

        obs, rew, done, _ = env.step(act.numpy().reshape(-1))
        obs = torch.as_tensor(obs, dtype=dtype).detach()

        acts_list.append(torch.as_tensor(act.clone(), dtype=dtype))
        rews_list.append(torch.as_tensor(rew, dtype=dtype))
        obs2_list.append(obs.clone())


        if cur_step < num_steps:
            done_list.append(torch.as_tensor(done))
        else:
            done_list.append(torch.as_tensor(False))

        cur_step+=1

    ep_obs1 = torch.stack(obs1_list)
    ep_acts = torch.stack(acts_list)
    ep_rews = torch.stack(rews_list).reshape(-1, 1)
    ep_obs2 = torch.stack(obs2_list)
    ep_done = torch.stack(done_list).reshape(-1, 1)

    print(torch.pow(ep_obs1,2).mean())
    return (ep_obs1, ep_obs2, ep_acts, ep_rews, ep_done)




def do_calc_local(env):
    print(env)
    obs = env.reset()
    print(obs)
    return obs

@ray.remote
def do_calc(env):
    print(env)
    obs = env.reset()
    print(obs)
    return obs

In [26]:
env_name = "Pendulum-v0"
env = gym.make(env_name)

In [50]:
#%%timeit
futures = [do_rollout.remote(env, model,200) for _ in range(1)]
a = ray.get(futures)

print(torch.pow(a[0][0],2).mean())

tensor(2.4565)


In [51]:
#%%timeit
for _ in range(1):
    b = do_rollout_local(env, model, 15000)
print(torch.pow(b[0],2).mean())

[2m[36m(pid=15594)[0m tensor(2.4565)
tensor(1.7422)
tensor(1.7422)


In [7]:
do_calc_local(env)

<TimeLimit<PendulumEnv<Pendulum-v0>>>
[-0.01198911 -0.99992813 -0.79208164]


array([-0.01198911, -0.99992813, -0.79208164])

In [8]:
future = do_calc.remote(env)
print(ray.get(future))



[-0.88505316  0.46548996  0.87610353]
[2m[36m(pid=15594)[0m <TimeLimit<PendulumEnv<Pendulum-v0>>>
[2m[36m(pid=15594)[0m [-0.88505316  0.46548996  0.87610353]
