This jupyter notebook teaches you how to create a dynamic (i.e. trained) state reduction object and how to train and use it.

In [1]:
import gymnasium as gym
import numpy as np

# Add parent directory to path
import sys
from pathlib import Path
current_dir = Path().resolve()
root_dir = current_dir.parent
if str(root_dir) not in sys.path:
    sys.path.insert(0,str(root_dir))

from Gyms.SimulatedNetwork import SimulatedNetwork

In [2]:
# Import the state reduction function
from StateReduction.DynamicStatePCA import DynamicStatePCA

In [3]:
# Define size of state and action spaces
state_dim  = 4 # Dimension of reduced state space
action_dim = 5 # Number of stimuli in action space (each stimulus needs a value of {0,1,2,3,4}

In [4]:
# Create an object of the state reduction function
state = DynamicStatePCA(state_dim=state_dim)

In [5]:
# Create environment and initialize it
env      = SimulatedNetwork(action_dim=action_dim,
                            state_dim=state_dim,
                            state_object=state) # Use the state object
state, _ = env.reset()
env.render() # This function gives you the current state + reward, which both is 0 after initialization

Current state: [0. 0. 0. 0.], Reward: 0


In [6]:
# Get 1000 responses to random stimuli for training
spikes    = []
elecs     = []
for i in range(1000):
    action = env.action_space.sample()
    state, reward, terminated, truncated, info = env.step(action)
    
    spikes.append(info['spikes'])
    elecs.append(info['elecs'])

In [7]:
# Train your state function (Notice, you are doing this through your environment)
env.fit(spikes,elecs)

In [8]:
# Example code, that stimulates the network 100 times with a randomly sampled action, while calculating also the average reward received

total_reward = 0
action_count = 0

for _ in range(100):
    # For simplicity, choose a random action
    action = env.action_space.sample()
    print(f"Stimulate with action: {action}")
    
    state, reward, terminated, truncated, info = env.step(action)
    total_reward += reward
    action_count += 1

    print(f"Reward: {reward}, Avg. reward: {total_reward/action_count}")
    print(f"State: {state}")

    # If you want a more complete plotting of each step
    # env.render()

    print("-----------------------------")

Stimulate with action: [3 2 1 3 2]
Reward: 1, Avg. reward: 1.0
State: [[ 0.54678344 -0.44760533 -0.34995769 -0.40822793]]
-----------------------------
Stimulate with action: [0 0 1 3 4]
Reward: 2, Avg. reward: 1.5
State: [[-0.27339561  0.03030624 -0.32796041  0.18190155]]
-----------------------------
Stimulate with action: [1 3 2 4 4]
Reward: -1, Avg. reward: 0.6666666666666666
State: [[-0.51084699 -0.82450463 -0.46722565 -0.38053875]]
-----------------------------
Stimulate with action: [0 1 2 4 4]
Reward: -1, Avg. reward: 0.25
State: [[ 0.02326778 -0.31836617 -0.59891569 -0.2656341 ]]
-----------------------------
Stimulate with action: [2 1 4 0 3]
Reward: 2, Avg. reward: 0.6
State: [[-0.04442075 -0.10680551 -0.26027669 -0.13704047]]
-----------------------------
Stimulate with action: [2 4 4 4 2]
Reward: 2, Avg. reward: 0.8333333333333334
State: [[-0.43085258  0.24221989  0.58527482 -0.17081639]]
-----------------------------
Stimulate with action: [1 1 3 2 3]
Reward: 1, Avg. rewa

In [9]:
print(f"Average reward: {total_reward/action_count}")

Average reward: 1.21
