In [40]:
import random
import numpy as np
import grpc

from communication_objects.all_pb2 import (
    ListOfInts, ListOfFloats,
    EnvironmentCommand, 
    EnvStepInput,
    AgentState, AgentMetrics, EnvStepOutput
)
import communication_objects.all_pb2_grpc

In [41]:
def run():
    with grpc.insecure_channel('localhost:50051') as channel:
        stub = communication_objects.all_pb2_grpc.EnvironmentStub(channel)
        
        env_in = EnvStepInput()

        action = env_in.agent_actions['0'].data.add()
        action.values_floats.data.extend(np.random.rand(5))

        action = env_in.agent_actions['1'].data.add()
        action.values_ints.data.extend([np.random.randint(1,5) for _ in range(3)])

        env_in.command = EnvironmentCommand.STEP

        env_state = stub.Step(env_in)
        return env_state

In [44]:
state = run()
state

agent_states {
  key: "0"
  value {
    data {
      next_observation {
        data: 0.31980186700820923
        data: 0.3058376908302307
        data: 0.21682040393352509
        data: 0.8762670755386353
        data: 0.9432609677314758
      }
      reward: 0.08085587620735168
    }
  }
}
agent_states {
  key: "1"
  value {
    data {
      next_observation {
        data: 0.22986407577991486
        data: 0.3545733094215393
        data: 0.1839819848537445
        data: 0.06244391202926636
        data: 0.9003799557685852
      }
      reward: 0.7044233083724976
    }
  }
}
agent_metrics {
  key: "0"
  value {
    data {
      steps_per_episode: 1.0
      step_count: 2.0
      temp_done_counter: 3.0
      done_count: 4.0
      reward_per_step: 5.0
      reward_per_episode: 6.0
      reward_total: 7.0
    }
  }
}

# Client side

In [37]:
env_in = EnvStepInput()

action = env_in.agent_actions['0'].data.add()
action.values_floats.data.extend(np.random.rand(5))

action = env_in.agent_actions['1'].data.add()
action.values_ints.data.extend([np.random.randint(1,5) for _ in range(3)])

env_in.command = EnvironmentCommand.STEP
env_in

agent_actions {
  key: "0"
  value {
    data {
      values_floats {
        data: 0.3700660169124603
        data: 0.09750878810882568
        data: 0.3732461631298065
        data: 0.10502585768699646
        data: 0.9162991046905518
      }
    }
  }
}
agent_actions {
  key: "1"
  value {
    data {
      values_ints {
        data: 4
        data: 3
        data: 3
      }
    }
  }
}

# Server side

In [38]:
a = AgentState()
a.next_observation.data.extend(np.random.rand(5)) 
a.reward = np.random.rand(1)[0]
a.done = bool(random.randint(0,2))
a

next_observation {
  data: 0.5468494892120361
  data: 0.01567339338362217
  data: 0.9128026366233826
  data: 0.9781962037086487
  data: 0.5143204927444458
}
reward: 0.1130015030503273
done: true

In [39]:
env_state = EnvStepOutput()

a = env_state.agent_states['0'].data.add()
a.next_observation.data.extend(np.random.rand(5))
a.reward = np.random.rand(1)[0]
a.done = bool(random.randint(0, 1))

a = env_state.agent_states['1'].data.add()
a.next_observation.data.extend(np.random.rand(5))
a.reward = np.random.rand(1)[0]
a.done = bool(random.randint(0, 1))

m = env_state.agent_metrics['0'].data.add()
m.steps_per_episode = 1
m.step_count = 2
m.temp_done_counter = 3
m.done_count = 4
m.reward_per_step = 5
m.reward_per_episode = 6
m.reward_total = 7
env_state

agent_states {
  key: "0"
  value {
    data {
      next_observation {
        data: 0.28972455859184265
        data: 0.5487870573997498
        data: 0.6819930672645569
        data: 0.8341315984725952
        data: 0.4603029191493988
      }
      reward: 0.4974982440471649
    }
  }
}
agent_states {
  key: "1"
  value {
    data {
      next_observation {
        data: 0.5511258244514465
        data: 0.7156957983970642
        data: 0.36487412452697754
        data: 0.7045494914054871
        data: 0.31693553924560547
      }
      reward: 0.6016888618469238
      done: true
    }
  }
}
agent_metrics {
  key: "0"
  value {
    data {
      steps_per_episode: 1.0
      step_count: 2.0
      temp_done_counter: 3.0
      done_count: 4.0
      reward_per_step: 5.0
      reward_per_episode: 6.0
      reward_total: 7.0
    }
  }
}