This jupyter notebook teaches you how to create a static (i.e. untrained) state reduction object and how to use it.

In [1]:
import gymnasium as gym
import numpy as np

# Add parent directory to path
import sys
from pathlib import Path
current_dir = Path().resolve()
root_dir = current_dir.parent
if str(root_dir) not in sys.path:
    sys.path.insert(0,str(root_dir))

from Gyms.SimulatedNetwork import SimulatedNetwork

In [2]:
# Import the state reduction function
from StateReduction.StaticStateSimple import StaticStateSimple

In [3]:
# Define size of state and action spaces
state_dim  = 4 # Dimension of reduced state space
action_dim = 2 # Number of stimuli in action space (each stimulus needs a value of {0,1,2,3,4}

In [4]:
# Create an object of the state reduction function
state = StaticStateSimple(state_dim=state_dim)

In [5]:
# Create environment and initialize it
env      = SimulatedNetwork(action_dim=action_dim,
                            state_dim=state_dim,
                            state_object=state) # Use the state object
state, _ = env.reset()
env.render() # This function gives you the current state + reward, which both is 0 after initialization

Current state: [0. 0. 0. 0.], Reward: 0


In [6]:
# You can now for example get a random action:
action = env.action_space.sample()
action
# This action can then be applied to the environment with:
# state, reward, terminated, truncated, info = env.step(action)

array([1, 1])

In [7]:
# Example code, that stimulates the network 100 times with a randomly sampled action, while calculating also the average reward received

total_reward = 0
action_count = 0

for _ in range(100):
    # For simplicity, choose a random action
    action = env.action_space.sample()
    print(f"Stimulate with action: {action}")
    
    state, reward, terminated, truncated, info = env.step(action)
    total_reward += reward
    action_count += 1

    print(f"Reward: {reward}, Avg. reward: {total_reward/action_count}")
    print(f"State: {state}")

    # If you want a more complete plotting of each step
    # env.render()

    print("-----------------------------")

Stimulate with action: [1 2]
Reward: 0, Avg. reward: 0.0
State: [-1.         -0.37667887  0.          0.        ]
-----------------------------
Stimulate with action: [3 0]
Reward: 1, Avg. reward: 0.5
State: [-0.5        -0.22294582 -0.37913153  0.        ]
-----------------------------
Stimulate with action: [2 3]
Reward: 1, Avg. reward: 0.6666666666666666
State: [-0.5        -0.40934919  0.          0.        ]
-----------------------------
Stimulate with action: [2 2]
Reward: 0, Avg. reward: 0.5
State: [-0.5        -0.29849402  0.14331208  0.        ]
-----------------------------
Stimulate with action: [2 1]
Reward: 1, Avg. reward: 0.6
State: [-0.5        -0.42786707  0.          0.        ]
-----------------------------
Stimulate with action: [2 3]
Reward: 1, Avg. reward: 0.6666666666666666
State: [-0.5        -0.41281963  0.          0.        ]
-----------------------------
Stimulate with action: [2 1]
Reward: 1, Avg. reward: 0.7142857142857143
State: [-0.5       -0.3996837  0. 

In [8]:
print(f"Average reward: {total_reward/action_count}")

Average reward: 0.43
