This jupyter gives you a simple example of how you should use the Simulated Network (asynchronous) environment. This environment is not meant as a training ground of your algorithms, but only to check whether or not your algorithm can be executed.

In [1]:
import gymnasium as gym
import numpy as np

# Add parent directory to path
import sys
from pathlib import Path
current_dir = Path().resolve()
root_dir = current_dir.parent
if str(root_dir) not in sys.path:
    sys.path.insert(0,str(root_dir))

from Gyms.SimulatedNetwork import SimulatedNetwork

In [2]:
# Define size of state and action spaces
state_dim  = 4 # Dimension of reduced state space
action_dim = 2 # Number of stimuli in action space (each stimulus needs a value of {0,1,2,3,4}

In [3]:
# Create environment and initialize it
env      = SimulatedNetwork(action_dim=action_dim,state_dim=state_dim)
state, _ = env.reset()
env.render() # This function gives you the current state + reward, which both is 0 after initialization

Current state: [0. 0. 0. 0.], Reward: 0


In [4]:
# Get the action space dimensions
env.action_space

MultiDiscrete([5 5])

In [5]:
# Get the state space dimensions
env.observation_space

Box(-1.0, 1.0, (4,), float32)

In [6]:
# You can now for example get a random action:
action = env.action_space.sample()
action
# This action can then be applied to the environment with:
# state, reward, terminated, truncated, info = env.step(action)

array([0, 0])

In [7]:
action.shape

(2,)

In [8]:
env.step(action)

(array([0., 0., 0., 0.]),
 0,
 False,
 False,
 {'spikes': [],
  'elecs': [],
  'action': array([0, 0]),
  'missed_cyc': 0,
  'stim_id': 1,
  'simulated': True,
  'comment': 'none'})

In [9]:
# Example code, that stimulates the network 100 times with a randomly sampled action, while calculating also the average reward received

total_reward = 0
action_count = 0

for _ in range(100):
    # For simplicity, choose a random action
    action = env.action_space.sample()
    print(f"Stimulate with action: {action}")
    
    state, reward, terminated, truncated, info = env.step(action)
    total_reward += reward
    action_count += 1

    print(f"Reward: {reward}, Avg. reward: {total_reward/action_count}")
    print(f"State: {state}")

    # If you want a more complete plotting of each step
    # env.render()

    print("-----------------------------")

Stimulate with action: [4 0]
Reward: 0, Avg. reward: 0.0
State: [ 0.5        -0.53323067  0.          0.        ]
-----------------------------
Stimulate with action: [2 0]
Reward: 0, Avg. reward: 0.0
State: [-0.5        -0.31378865  0.66022465  0.        ]
-----------------------------
Stimulate with action: [0 1]
Reward: 0, Avg. reward: 0.0
State: [-1.         -0.44056442  0.          0.        ]
-----------------------------
Stimulate with action: [2 4]
Reward: 2, Avg. reward: 0.5
State: [-0.5        -0.42792942 -0.03327589  0.        ]
-----------------------------
Stimulate with action: [3 4]
Reward: 1, Avg. reward: 0.6
State: [ 0.         -0.40983504  0.          0.        ]
-----------------------------
Stimulate with action: [2 2]
Reward: 0, Avg. reward: 0.5
State: [-0.5        -0.42266368  0.0876144   0.        ]
-----------------------------
Stimulate with action: [1 3]
Reward: -1, Avg. reward: 0.2857142857142857
State: [-1.          0.          0.28910005  0.        ]
------

In [10]:
info

{'spikes': array([4.36079365, 9.55334654]),
 'elecs': array([3, 0]),
 'action': array([4, 0]),
 'missed_cyc': 0,
 'stim_id': 101,
 'simulated': True,
 'comment': 'none'}

In [11]:
print(f"Average reward: {total_reward/action_count}")

Average reward: 0.36
