This jupyter notebook teaches you how to create a dynamic (i.e. trained) state reduction object and how to train and use it.

In [1]:
import gymnasium as gym
import numpy as np

# Add parent directory to path
import sys
from pathlib import Path
current_dir = Path().resolve()
root_dir = current_dir.parent
if str(root_dir) not in sys.path:
    sys.path.insert(0,str(root_dir))

from Gyms.SimulatedNetwork import SimulatedNetwork

In [2]:
# Import the state reduction function
from StateReduction.DynamicStatePCA import DynamicStatePCA

In [3]:
# Define size of state and action spaces
state_dim  = 4 # Dimension of reduced state space
action_dim = 2 # Number of stimuli in action space (each stimulus needs a value of {0,1,2,3,4}

In [4]:
# Create an object of the state reduction function
state = DynamicStatePCA(state_dim=state_dim)

In [5]:
# Create environment and initialize it
env      = SimulatedNetwork(action_dim=action_dim,
                            state_dim=state_dim,
                            state_object=state) # Use the state object
state, _ = env.reset()
env.render() # This function gives you the current state + reward, which both is 0 after initialization

Current state: [0. 0. 0. 0.], Reward: 0


In [6]:
# Get 1000 responses to random stimuli for training
spikes    = []
elecs     = []
for i in range(1000):
    action = env.action_space.sample()
    state, reward, terminated, truncated, info = env.step(action)
    
    spikes.append(info['spikes'])
    elecs.append(info['elecs'])

In [7]:
# Train your state function (Notice, you are doing this through your environment)
env.fit(spikes,elecs)

In [8]:
# Example code, that stimulates the network 100 times with a randomly sampled action, while calculating also the average reward received

total_reward = 0
action_count = 0

for _ in range(100):
    # For simplicity, choose a random action
    action = env.action_space.sample()
    print(f"Stimulate with action: {action}")
    
    state, reward, terminated, truncated, info = env.step(action)
    total_reward += reward
    action_count += 1

    print(f"Reward: {reward}, Avg. reward: {total_reward/action_count}")
    print(f"State: {state}")

    # If you want a more complete plotting of each step
    # env.render()

    print("-----------------------------")

Stimulate with action: [1 4]
Reward: -1, Avg. reward: -1.0
State: [[ 0.30628152 -0.15553553  0.00276272  0.36216179]]
-----------------------------
Stimulate with action: [1 3]
Reward: 0, Avg. reward: -0.5
State: [[ 0.09040817 -0.16395798 -0.31297451  0.0827318 ]]
-----------------------------
Stimulate with action: [2 1]
Reward: 1, Avg. reward: 0.0
State: [[-0.31802051  0.04071407  0.13242686 -0.03005415]]
-----------------------------
Stimulate with action: [4 2]
Reward: -1, Avg. reward: -0.25
State: [[-0.47854874 -0.06397495 -0.50128402  0.23572251]]
-----------------------------
Stimulate with action: [4 0]
Reward: 0, Avg. reward: -0.2
State: [[-0.24976675 -0.13220401 -0.44660677 -0.41375846]]
-----------------------------
Stimulate with action: [2 4]
Reward: -1, Avg. reward: -0.3333333333333333
State: [[-0.34507465  0.15729278 -0.22463936  0.35917431]]
-----------------------------
Stimulate with action: [2 4]
Reward: 2, Avg. reward: 0.0
State: [[-0.25053609 -0.03495259  0.0589559

In [9]:
print(f"Average reward: {total_reward/action_count}")

Average reward: 0.24
