In [1]:
import sys, os
from pathlib import Path
sys.path.append(str(Path(os.path.abspath('')).absolute().parent.parent))

In [13]:
import random
import numpy as np
import grpc

from shiva.core.communication_objects.env_command_pb2 import EnvironmentCommand
from shiva.core.communication_objects.env_step_pb2 import ( EnvStepInput, EnvStepOutput )
from shiva.core.communication_objects.env_specs_pb2 import EnvironmentSpecs
from shiva.core.communication_objects.env_metrics_pb2 import AgentMetrics
from shiva.core.communication_objects.agent_state_pb2 import AgentState
from shiva.core.communication_objects.service_env_pb2_grpc import EnvironmentStub, EnvironmentServicer, add_EnvironmentServicer_to_server

In [19]:
def run():
    with grpc.insecure_channel('localhost:50051') as channel:
        stub = EnvironmentStub(channel)
        
        env_in = EnvStepInput()

        action = env_in.agent_actions['0'].data.add()
        action.data.extend(np.random.rand(5))

        action = env_in.agent_actions['1'].data.add()
        action.data.extend([np.random.randint(1,5) for _ in range(3)])

        env_in.command = EnvironmentCommand.STEP

        env_state = stub.Step(env_in)
        return env_state

In [24]:
s = []
for _ in range(5):
    state = run()
    s.append(state)

In [26]:
s[0]

agent_states {
  key: "0"
  value {
    data {
      next_observation {
        data: 0.6423773169517517
        data: 0.926799476146698
        data: 0.3249926269054413
        data: 0.9849494099617004
        data: 0.6095330119132996
      }
      reward: 0.24065637588500977
    }
  }
}
agent_states {
  key: "1"
  value {
    data {
      next_observation {
        data: 0.7816311717033386
        data: 0.38939034938812256
        data: 0.5860176086425781
        data: 0.5583418011665344
        data: 0.7874003648757935
      }
      reward: 0.9832462072372437
    }
  }
}
agent_metrics {
  key: "0"
  value {
    data {
      steps_per_episode: 1.0
      step_count: 2.0
      temp_done_counter: 3.0
      done_count: 4.0
      reward_per_step: 5.0
      reward_per_episode: 6.0
      reward_total: 7.0
    }
  }
}

# Client side

In [27]:
env_in = EnvStepInput()

action = env_in.agent_actions['0'].data.add()
action.data.extend(np.random.rand(5))

action = env_in.agent_actions['1'].data.add()
action.data.extend([np.random.randint(1,5) for _ in range(3)])

env_in.command = EnvironmentCommand.STEP
env_in

agent_actions {
  key: "0"
  value {
    data {
      data: 0.22562913596630096
      data: 0.06373032927513123
      data: 0.3242224156856537
      data: 0.4104943871498108
      data: 0.377702534198761
    }
  }
}
agent_actions {
  key: "1"
  value {
    data {
      data: 2.0
      data: 2.0
      data: 2.0
    }
  }
}

# Server side

In [28]:
a = AgentState()
a.next_observation.data.extend(np.random.rand(5)) 
a.reward = np.random.rand(1)[0]
a.done = bool(random.randint(0,2))
a

next_observation {
  data: 0.15968164801597595
  data: 0.6550961136817932
  data: 0.5685981512069702
  data: 0.08132286369800568
  data: 0.7154539823532104
}
reward: 0.5849597454071045
done: true

In [29]:
env_state = EnvStepOutput()

a = env_state.agent_states['0'].data.add()
a.next_observation.data.extend(np.random.rand(5))
a.reward = np.random.rand(1)[0]
a.done = bool(random.randint(0, 1))

a = env_state.agent_states['1'].data.add()
a.next_observation.data.extend(np.random.rand(5))
a.reward = np.random.rand(1)[0]
a.done = bool(random.randint(0, 1))

m = env_state.agent_metrics['0'].data.add()
m.steps_per_episode = 1
m.step_count = 2
m.temp_done_counter = 3
m.done_count = 4
m.reward_per_step = 5
m.reward_per_episode = 6
m.reward_total = 7
env_state

agent_states {
  key: "0"
  value {
    data {
      next_observation {
        data: 0.0219777449965477
        data: 0.14421477913856506
        data: 0.25505006313323975
        data: 0.9593082666397095
        data: 0.5424119234085083
      }
      reward: 0.8846791386604309
      done: true
    }
  }
}
agent_states {
  key: "1"
  value {
    data {
      next_observation {
        data: 0.021517086774110794
        data: 0.9671235084533691
        data: 0.3702682554721832
        data: 0.6292982697486877
        data: 0.6549540758132935
      }
      reward: 0.11284598708152771
      done: true
    }
  }
}
agent_metrics {
  key: "0"
  value {
    data {
      steps_per_episode: 1.0
      step_count: 2.0
      temp_done_counter: 3.0
      done_count: 4.0
      reward_per_step: 5.0
      reward_per_episode: 6.0
      reward_total: 7.0
    }
  }
}