# Imports

In [25]:
import numpy as np
import gym
from osim.env import L2RunEnv
from replay_buffer import *

# Replay buffer

In [26]:
class L2RubWrap:
    
    def __init__(self):
        self.env = L2RunEnv(visualize=False)
        
    def reset(self):
        return np.array(self.env.reset())
    
    def step(self, a):
        next_obs, reward, done, _ = self.env.step(a)
        return np.array(next_obs), reward, done, _

In [27]:
env = L2RubWrap()

[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.[0m
[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.[0m


In [48]:
sb = ServerBuffer(23, history_len=2, num_of_parts_in_obs=2)

In [49]:
for j in range(3):
    ab = AgentBuffer(13, history_len=3, num_of_parts_in_obs=2)
    obs = env.reset()
    #print (obs)
    ab.push_init_observation([obs, np.random.random((5, 5))])
    for i in range(10):
        #action = 4*np.random.random(1)-2
        action = np.random.uniform(0, 0.7, size=18)
        next_obs, reward, done, _ = env.step(action)
        trans = [[next_obs, np.random.random((5, 5))], action, reward, done]
        ab.push_transition(trans)
        if done: break
    episode = ab.get_complete_episode()
    sb.push_episode(episode)

In [50]:
batch = sb.get_batch(3)

In [51]:
batch.s[0].shape

(3, 1, 41)

In [52]:
for i in range(len(batch.s)):
    shape = batch.s[i].shape
    new_shape = (shape[0],)+(-1,)
    batch.s[i] = batch.s[i].reshape(new_shape)

In [53]:
batch.s[0].shape

(3, 41)

In [7]:
for i in range(len(batch.s)):
    print ('Part '+str(i+1) + ' of the batch.s has shape: ', batch.s[i].shape)
print ('batch.a has shape: ', batch.a.shape)
print ('batch.r has shape: ', batch.r.shape)
for i in range(len(batch.s)):
    print ('Part '+str(i+1) + ' of the batch.s_ has shape: ', batch.s_[i].shape)
print ('batch.done has shape: ', batch.done.shape)

Part 1 of the batch.s has shape:  (3, 2, 41)
Part 2 of the batch.s has shape:  (3, 2, 5, 5)
batch.a has shape:  (3, 18)
batch.r has shape:  (3,)
Part 1 of the batch.s_ has shape:  (3, 2, 41)
Part 2 of the batch.s_ has shape:  (3, 2, 5, 5)
batch.done has shape:  (3,)


# Data transmission between client and server

In [17]:
import zlib
import marshal

# marshal is the fastest for serialization, sending and receiving data
# also tested
# - json
# - pickle

def serialize(object):
    return zlib.compress(marshal.dumps(object, 2))

def deserialize(bytes):
    return marshal.loads(zlib.decompress(bytes))

In [18]:
def obs_to_string(observations):
    str_obs = []
    for obs in observations:
        str_obs.append(obs.reshape(-1).tostring())
    return str_obs

def episode_to_req(episode, method='store_exp_batch'):
    observations, actions, rewards, dones = episode
    str_obs = obs_to_string(observations)
    str_act = actions.tolist()
    str_rew = rewards.tolist()
    str_don = dones.tolist()
    req = serialize({'method':method, 
                     'observations':str_obs,
                     'actions':str_act,
                     'rewards':str_rew,
                     'dones':str_don})
    return req

In [19]:
req = episode_to_req(episode)

In [20]:
def string_to_obs(strings, obs_shapes):
    obs_str = []
    for i, str_ in enumerate(strings):
        obs_str.append(np.frombuffer(str_, dtype=np.float32).reshape((-1,)+obs_shapes[i]))
    return obs_str

def req_to_episode(request, obs_shapes):
    req = deserialize(request)
    observations = string_to_obs(req['observations'], obs_shapes)
    actions = np.array(req['actions'], dtype=np.float32)
    rewards = np.array(req['rewards'], dtype=np.float32)
    dones = np.array(req['dones'], dtype=np.bool)
    return [observations, actions, rewards, dones]

In [21]:
episode_restored = req_to_episode(req, obs_shapes=[(41,), (5,5)])

In [22]:
for i in range(len(episode[0])):
    check = np.all(episode[0][i]==episode_restored[0][i])
    print ('Part '+str(i+1)+' of observations transmitted succesfully:', check)
names = ['Actions', 'Rewards', 'Dones']
for i in range(3):
    check = np.all(episode[i+1]==episode_restored[i+1])
    print (names[i]+' transmitted succesfully:', check)

Part 1 of observations transmitted succesfully: True
Part 2 of observations transmitted succesfully: True
Actions transmitted succesfully: True
Rewards transmitted succesfully: True
Dones transmitted succesfully: True


In [23]:
episode[3]

array([False, False, False, False, False, False, False, False, False,
       False])

In [24]:
episode_restored[3]

array([False, False, False, False, False, False, False, False, False,
       False])