# Polebalancing using NESTML

In this tutorial, we are going to build an agent that can successfully solve the classic pole balancing problem using reinforcement learning. We will start with a standard temporal difference learning approach and after that, use NESTML to set up a spiking neural network to perform this task.

# Cart Pole Environment

For the cart pole environment, we mostly need three things:  
    - A renderer to display the simulation  
    - The physics system and  
    - An input to be able to nudge the pole in both directions  

For that, we will need the following packages:

In [None]:
import pygame as pg
from typing import Tuple
import numpy as np

Let's start with the renderer...

In [2]:
#Renders the scene. IMPORTANT: Because ipycanvas uses the html canvas coordinates, the y-axis is inverted.
class Renderer():
    def __init__(self, width: int, height: int, origin_x: int = 0, origin_y: int = 0, SCALE: int = 1) -> None:
        self.width = width
        self.height = height
        self.origin = (origin_x, origin_y)
        self.SCALE = SCALE #1m = SCALE pixels

        pg.display.init()
        pg.display.set_caption("Pole Balancing Simulator")
        pg.font.init()
        self.screen = pg.display.set_mode((width, height))
    
    #Translates global coordinates into screen coordinates
    def translate(self, x: int, y: int) -> Tuple[int, int]:
        return (x+self.origin[0], -y+self.origin[1])
    
    #Draws ground. offset is there to shift the ground below the car
    def draw_ground(self, offset: int, color) -> None:
        ground = pg.Rect(self.translate(-self.width//2, -offset * self.SCALE), (self.width, self.height-self.origin[1]-offset * self.SCALE))
        pg.draw.rect(self.screen, color, ground)

    #Draws car. pos_y is omitted because the car's center should be at y = 0
    def draw_car(self, pos_x: float, car_color = "blue", wheel_color = "black") -> None:
        pos_x *= self.SCALE
        #values, hard-coded for now, in meters
        width = 0.5 * self.SCALE
        height = 0.25 * self.SCALE
        wheel_radius = 0.1 * self.SCALE

        car_body = pg.Rect(self.translate(pos_x - width/2, height/2), (width, height))
        pg.draw.rect(self.screen, car_color, car_body)
        pg.draw.circle(self.screen, wheel_color, 
                           self.translate(pos_x - width/2 + wheel_radius, -height/2), wheel_radius)
        pg.draw.circle(self.screen, wheel_color, 
                           self.translate(pos_x + width/2 - wheel_radius, -height/2), wheel_radius)

    #Draws the pole
    def draw_pole(self, pos_x: float, theta: float, length: float, width: float = 0.1, color = "red") -> None:
        pos_x *= self.SCALE
        width = int(width * self.SCALE)
        pole_end_x = length * np.sin(theta) * self.SCALE + pos_x
        pole_end_y = length * np.cos(theta) * self.SCALE
        pg.draw.line(self.screen, color, self.translate(pos_x, 0), self.translate(pole_end_x, pole_end_y), width)

    #Clears the entire canvas
    def draw_clear(self) -> None:
        self.screen.fill("white")

    #Draws physical values
    def draw_stats(self, theta: float, dw: float, a: float, x: float, episode: int) -> None:
        font = pg.font.Font(None, 24)
        text = font.render(str(theta)[:4] + " | " + str(dw)[:4] + " | " + str(x)[:4] + " | " + str(a)[:4] + " | episode: " + str(episode), True, (10,10,10))
        textpos = text.get_rect(centerx=self.screen.get_width() / 2, y=10)
        self.screen.blit(text, textpos)

    #Get the 
    def get_relative_mouse_x(self, mouse_x:float) -> float:
        return (mouse_x-self.origin[0])/self.SCALE
    
    def display(self) -> None:
        pg.display.flip()

## Physics Updates

For the physics, we use the corrected version of of the original problem derived from V. Florian (CITATION NEEDED), but omit the friction forces.
The situation is sketched here:  

![alt text](cartpole_illustration.png "Cartpole")

We apply Newton's second law of motion to the cart:  
$$
\begin{aligned}
    \mathbf{F} + \mathbf{G}_c - \mathbf{N} = m_c \cdot \mathbf{a}_c
\end{aligned}
$$
Where:  

$\mathbf{F} = F \cdot \mathbf{u_x}$ is the control force acting on the cart,  
$\mathbf{G}_c = m_c \cdot g \cdot \mathbf{u}_y$ is the gravitational component acting on the cart,  
$\mathbf{N} = N_x \cdot \mathbf{u}_x - N_y \cdot \mathbf{u}_y$ is the negative reaction force that the pole is applying on the cart,  
$\mathbf{a}_c = \ddot{x} \cdot \mathbf{u}_x$ is the accelaration of the cart,  
$m_c$ is the cart's mass and  
$\mathbf{u}_x$, $\mathbf{u}_y$, $\mathbf{u}_z$ are the unit vectors of the frame of reference given in the illustration.

We can decompose this equation now into the $x$ and $y$ component:
$$
\begin{aligned}
    F - N_x = m_c \cdot \ddot{x}
\end{aligned}
$$
$$
\begin{aligned}
    m_c \cdot g + N_y = 0
\end{aligned}
$$

Newton's second law of motion applied to the pole gives us:
$$
\begin{aligned}
    \mathbf{N} + \mathbf{G}_p = m_p \cdot \mathbf{a}_p
\end{aligned}
$$

Where $\mathbf{G}_p = m_p \cdot g \cdot \mathbf{u}_y$.

The accelaration $\mathbf{a}_p$ of the pole's center of mass consists of three components, where $\mathbf{r}_p = l \cdot (\sin{\theta}\cdot \mathbf{u}_x-\cos{\theta}\cdot \mathbf{u}_y)$ denotes the vector pointing to the pole's center of mass relative to it's rotation center:  
1. The accelaration of the cart it is attached to $\mathbf{a}_c$,
2. The pole's angular accelaration $\mathbf{\epsilon} = \ddot{\theta} \cdot \mathbf{u}_z$, which is translated into accelaration by $\mathbf{\epsilon} \times \mathbf{r}_p$.
3. The pole's angular velocity $\mathbf{\omega} = \dot{\theta} \cdot \mathbf{u}_z$, for which the accelaration can be derived by  $\mathbf{\omega} \times (\mathbf{\omega} \times \mathbf{r}_p)$.

Thus we obtain:
$$
\begin{aligned}
    \mathbf{a}_p  = \mathbf{a}_c + \mathbf{\epsilon} \times \mathbf{r}_p + \mathbf{\omega} \times (\mathbf{\omega} \times \mathbf{r}_p)
\end{aligned}
$$
Substituting $\mathbf{r}_p = l \cdot (\sin{\theta}\cdot \mathbf{u}_x-\cos{\theta}\cdot \mathbf{u}_y)$ and $\mathbf{a}_p = \ddot{x} \cdot \mathbf{u}_x$ as well as $\mathbf{u}_z \times \mathbf{u}_x = \mathbf{u}_y$ and $\mathbf{u}_z \times \mathbf{u}_y = -\mathbf{u}_x$:
\begin{aligned}
    \mathbf{a}_p  = \ddot{x} \cdot \mathbf{u}_x + l \cdot \ddot{\theta} \cdot (\sin{\theta}\cdot \mathbf{u}_y + \cos{\theta}\cdot \mathbf{u}_x) - l \cdot \dot{\theta}^2 \cdot (\sin{\theta}\cdot \mathbf{u}_x - \cos{\theta}\cdot \mathbf{u}_y)
\end{aligned}

Inserting this quation into our equation for the forces of the pole and decomposing on the $x$ and $y$ axis we obtain:
$$
\begin{aligned}
    N_x = m_p \cdot (\ddot{x} + l \cdot \ddot{\theta} \cdot \cos{\theta} - l \cdot \dot{\theta}^2 \cdot \sin{\theta})
\end{aligned}
$$
$$
\begin{aligned}
    m_p \cdot g - N_y = m_p \cdot (l \cdot \ddot{\theta} \cdot \sin{\theta} + l \cdot \dot{\theta}^2 \cdot \cos{\theta})
\end{aligned}
$$

# TODO: FINISH EQUATION DERIVATION (SOLVE EQUATION REFERENCING?)

In [3]:
class Physics():
    
    def __init__(self, x, theta, v = 0, a = 0, w = 0, dw = 0, g = 9.81, m_c = 1, m_p = 0.1, l = 0.5, dt = 0.02) -> None:
        self.__dict__.update(vars())

    def dw_step(self, cart_force, nudge_force) -> float:
        numerator = self.g * np.sin(self.theta) + np.cos(self.theta) * (-cart_force - self.m_p * self.l * self.w**2 * np.sin(self.theta))/(self.m_c+self.m_p) + nudge_force * np.cos(self.theta)/(self.m_p*self.l)
        denominator = self.l * (4/3 - (self.m_p*np.cos(self.theta)**2)/(self.m_c+self.m_p))

        self.dw = numerator/denominator
        self.w += self.dt * self.dw
        self.theta += self.dt * self.w

        return self.theta
    
    def a_step(self, force) -> float:
        numerator = force + self.m_p * self.l * (self.w**2 * np.sin(self.theta) - self.dw * np.cos(self.theta))
        denominator = self.m_c + self.m_p

        self.a = numerator/denominator
        self.v += self.dt * self.a
        self.x += self.dt * self.v

        return self.x

    def update(self, force, mouse_x) -> Tuple[float, float]:
        nudge_force = 0
        if mouse_x is not None:
            nudge_force = -1 if mouse_x > self.x else 1
        return (self.dw_step(force, nudge_force), self.a_step(force))
    
    #get state of the system that agent can see
    def get_state(self) -> Tuple[float,float,float,float]:
        return (self.x, self.theta, self.v, self.w)
    
    def reset(self) -> None:
        self.x = 0
        self.theta = (np.random.rand() - 1) / 10
        self.v = 0
        self.a = 0
        self.w = 0
        self.dw = 0


# The Agent (BOXES)

In [4]:

class Agent:
    def __init__(self, initial_state: Tuple[float,float,float,float]) -> None:

        #thresholds for discretizing the state space
        self.x_thresholds = np.array([-2.4, -0.8, 0.8, 2.4])
        self.theta_thresholds = np.array([-12, -6, -1, 0, 1, 6, 12])
        self.theta_thresholds = self.theta_thresholds /180 * np.pi
        self.v_thresholds = np.array([float("-inf"), -0.5, 0.5, float("+inf")]) #open intervals ignored here
        self.w_thresholds = np.array([float("-inf"), -50, 50, float("+inf")]) #open intervals ignored here
        self.w_thresholds = self.w_thresholds /180 * np.pi

        self.dimensions = (len(self.x_thresholds) - 1, len(self.theta_thresholds) - 1, len(self.v_thresholds) - 1, len(self.w_thresholds) - 1)

        self.boxes = np.random.rand(self.dimensions[0], 
                                    self.dimensions[1], 
                                    self.dimensions[2], 
                                    self.dimensions[3], 
                                    2) #one q-value for left and right respectively
        box = self.get_box(initial_state)
        self.current_box = self.boxes[box[0], box[1], box[2], box[3], :]

        self.episode = 1
    
    def discretize(self, value, thresholds):
        for i, limit in enumerate(thresholds):
            if value < limit:
                return i - 1
        return -1

    def get_box(self, state: Tuple[float,float,float,float]) -> Tuple[int,int,int,int]:
        return (self.discretize(state[0], self.x_thresholds),
                 self.discretize(state[1], self.theta_thresholds),
                 self.discretize(state[2], self.v_thresholds), 
                 self.discretize(state[3], self.w_thresholds))
    
    def get_episode(self) -> int:
        return self.episode
    
    
    def failure_reset(self, state: Tuple[float,float,float,float]):
        box = self.get_box(state)
        self.current_box = self.boxes[box[0], box[1], box[2], box[3], :]
        self.episode += 1


class NonSpikingAgent(Agent):
    def __init__(self, initial_state: Tuple[float,float,float,float], learning_rate, learning_decay, epsilon, epsilon_decay, discount_factor) -> None:
        super().__init__(initial_state)

        #learning paramters
        self.learning_rate = learning_rate
        self. learning_decay = learning_decay
        self.epsilon = epsilon
        self.epsilon_decay = epsilon_decay
        self.discount_factor = discount_factor

    #returns 0 if the action is "left", else "1"
    def choose_action(self) -> int:
        self.action = np.random.choice([np.argmax(self.current_box), np.argmin(self.current_box)], p=[1-self.epsilon, self.epsilon])
        return self.action
    
    #returns 0 if no failure occured, else 1
    #reward is -1 on failure and 0 else
    def update(self, next_state: Tuple[float,float,float,float]) -> int:
        box = self.get_box(next_state)
        if -1 in box:
            self.current_box[self.action] += self.learning_rate * -1
            return 1
        
        next_box = self.boxes[box[0], box[1], box[2], box[3], :]
        next_q = np.max(next_box)
        self.current_box[self.action] += self.learning_rate * (self.discount_factor * (next_q - self.current_box[self.action]))

        self.current_box = next_box
        self.epsilon *= self.epsilon_decay
        self.learning_rate *= self.learning_decay

        return 0
    

# Executing Non-Spiking-Agent

In [None]:
import sys
import matplotlib.pyplot as plt

r = Renderer(1200, 800, 600, 500, 400)
clock = pg.time.Clock()
running = True

p = Physics(0, (np.random.rand() - 1) / 10)

a = NonSpikingAgent(p.get_state(), 0.5, 0.9999, 1, 0.995, 0.99)

plt.ion()  # turning interactive mode on
# preparing the data
y_plot = [0]
x_plot = [0]

# plotting the first frame
graph = plt.plot(x_plot,y_plot)[0]
plt.pause(1)

steps_per_episode = 0
max_steps = 0

while running:
    steps_per_episode += 1

    force = 0
    mouse_x = None

    # poll for events
    for event in pg.event.get():
        if event.type == pg.QUIT:
            running = False
            pg.quit()
            sys.exit()
            quit()
        elif event.type == pg.MOUSEBUTTONDOWN:
            mouse_x = r.get_relative_mouse_x(pg.mouse.get_pos()[0])

    # agent chooses action, simulation is uodated and reward is calculated
    force = 10 if a.choose_action() else -10
    theta, x = p.update(force, mouse_x)
    failure = a.update(p.get_state())

    if failure:
        p.reset()
        a.failure_reset(p.get_state())

        if steps_per_episode > max_steps:
            max_steps = steps_per_episode
        y_plot.append(steps_per_episode)
        x_plot.append(a.get_episode())
        
        # removing the older graph
        graph.remove()
        
        # plotting newer graph
        graph = plt.plot(x_plot,y_plot,color = 'g')[0]
        plt.xlim(x_plot[0], x_plot[-1])
        plt.ylim(0, max_steps)
        # calling pause function to let it draw the graoh in between episodes
        plt.pause(0.0001)

        steps_per_episode = 0
    
    
    if a.get_episode() > 1000:
        r.draw_clear()
        r.draw_ground(0.2, "grey")
        r.draw_car(x)
        r.draw_pole(x, theta, 2*p.l, 0.02)
        r.draw_stats(theta*180/np.pi, p.w*180/np.pi, x, p.a, a.get_episode())
        r.display()

        clock.tick(50)  # limits FPS to 50


# TODO: clean up code, derive equations and explain renderer briefly

# Spiking version

In [1]:
# ... generate NESTML model code...

from pynestml.codegeneration.nest_code_generator_utils import NESTCodeGeneratorUtils

# generate and build code
input_layer_module_name, input_layer_neuron_model_name = \
   NESTCodeGeneratorUtils.generate_code_for("../../../models/neurons/ignore_and_fire_neuron.nestml")

# ignore_and_fire
output_layer_module_name, output_layer_neuron_model_name, output_layer_synapse_model_name = \
    NESTCodeGeneratorUtils.generate_code_for("../../../models/neurons/iaf_psc_exp_neuron.nestml",
                                             "../../../models/synapses/neuromodulated_stdp_synapse.nestml",
                                             post_ports=["post_spikes"],
                                             mod_ports=["mod_spikes"],
                                             logging_level="DEBUG",
                                             codegen_opts={"delay_variable": {"neuromodulated_stdp_synapse": "d"},
                                                           "weight_variable": {"neuromodulated_stdp_synapse": "w"}})



              -- N E S T --
  Copyright (C) 2004 The NEST Initiative

 Version: 3.8.0-post0.dev0
 Built: Dec 10 2024 12:04:47

 This program is provided AS IS and comes with
 NO WARRANTY. See the file LICENSE for details.

 Problems or suggestions?
   Visit https://www.nest-simulator.org

 Type 'nest.help()' to find out more about NEST.


              -- N E S T --
  Copyright (C) 2004 The NEST Initiative

 Version: 3.8.0-post0.dev0
 Built: Dec 10 2024 12:04:47

 This program is provided AS IS and comes with
 NO WARRANTY. See the file LICENSE for details.

 Problems or suggestions?
   Visit https://www.nest-simulator.org

 Type 'nest.help()' to find out more about NEST.

  cmake_minimum_required() should be called prior to this top-level project()
  call.  Please see the cmake-commands(7) manual for usage documentation of
  both commands.

-- The CXX compiler identification is GNU 12.3.0
-- Detecting CXX compiler ABI info
-- Detecting CXX compiler ABI info - done
-- Check for working

[12,GLOBAL, INFO]: Start processing '/home/charl/julich/nestml-fork-AlexisWis-cart_pole_tutorial/nestml/doc/tutorials/cart_pole_reinforcement_learning/../../../models/synapses/neuromodulated_stdp_synapse.nestml'!
[13,neuromodulated_stdp_synapse_nestml, INFO, [62:13;62:18]]: Implicit casting from (compatible) type '1 / ms' to 'real'.
[14,neuromodulated_stdp_synapse_nestml, INFO, [83:13;84:59]]: Implicit casting from (compatible) type 'ms' to 'real'.
[15,iaf_psc_exp_neuron_nestml, INFO, [66:39;66:63]]: Implicit magnitude conversion from pA to pA buffer with factor 1.0 
[16,iaf_psc_exp_neuron_nestml, INFO, [66:15;66:30]]: Implicit magnitude conversion from mV / ms to pA / pF with factor 1.0 
[18,neuromodulated_stdp_synapse_nestml, INFO, [62:13;62:18]]: Implicit casting from (compatible) type '1 / ms' to 'real'.
[19,neuromodulated_stdp_synapse_nestml, INFO, [83:13;84:59]]: Implicit casting from (compatible) type 'ms' to 'real'.
[20,GLOBAL, INFO]: State variables that will be moved from syn

INFO:Analysing input:
INFO:{
    "dynamics": [
        {
            "expression": "I_syn_exc' = (-I_syn_exc) / tau_syn_exc",
            "initial_values": {
                "I_syn_exc": "0"
            }
        },
        {
            "expression": "I_syn_inh' = (-I_syn_inh) / tau_syn_inh",
            "initial_values": {
                "I_syn_inh": "0"
            }
        },
        {
            "expression": "V_m' = (-(V_m - E_L)) / tau_m + (I_syn_exc - I_syn_inh + I_e + I_stim) / C_m",
            "initial_values": {
                "V_m": "E_L"
            }
        },
        {
            "expression": "refr_t' = (-1000.0) * 1.0 / 1000.0",
            "initial_values": {
                "refr_t": "0"
            }
        }
    ],
    "options": {
        "output_timestep_symbol": "__h"
    },
    "parameters": {
        "C_m": "250",
        "E_L": "(-70)",
        "I_e": "0",
        "V_reset": "(-70)",
        "V_th": "(-55)",
        "refr_T": "2",
        "tau_m": "10

[33,GLOBAL, INFO]: Successfully constructed neuron-synapse pair iaf_psc_exp_neuron_nestml__with_neuromodulated_stdp_synapse_nestml, neuromodulated_stdp_synapse_nestml__with_iaf_psc_exp_neuron_nestml
[34,GLOBAL, INFO]: Analysing/transforming model 'iaf_psc_exp_neuron_nestml'
[35,iaf_psc_exp_neuron_nestml, INFO, [55:0;108:0]]: Starts processing of the model 'iaf_psc_exp_neuron_nestml'


DEBUG:Created Shape with symbol I_syn_exc, derivative_factors = [-1/tau_syn_exc], inhom_term = 0.0, nonlin_term = 0
INFO:	Returning shape: Shape "I_syn_exc" of order 1
INFO:
Processing differential-equation form shape I_syn_inh with defining expression = "(-I_syn_inh) / tau_syn_inh"
DEBUG:Splitting expression -I_syn_inh/tau_syn_inh (symbols [I_syn_exc, I_syn_inh, V_m, refr_t, I_syn_exc, I_syn_inh])
DEBUG:	linear factors: Matrix([[0], [-1/tau_syn_inh], [0], [0], [0], [0]])
DEBUG:	inhomogeneous term: 0.0
DEBUG:	nonlinear term: 0.0
DEBUG:Created Shape with symbol I_syn_inh, derivative_factors = [-1/tau_syn_inh], inhom_term = 0.0, nonlin_term = 0
INFO:	Returning shape: Shape "I_syn_inh" of order 1
INFO:
Processing differential-equation form shape V_m with defining expression = "(-(V_m - E_L)) / tau_m + (I_syn_exc - I_syn_inh + I_e + I_stim) / C_m"
DEBUG:Splitting expression (E_L - V_m)/tau_m + (I_e + I_stim + I_syn_exc - I_syn_inh)/C_m (symbols [I_syn_exc, I_syn_inh, V_m, refr_t, I_syn_exc

INFO:Analysing input:
INFO:{
    "dynamics": [
        {
            "expression": "I_syn_exc' = (-I_syn_exc) / tau_syn_exc",
            "initial_values": {
                "I_syn_exc": "0"
            }
        },
        {
            "expression": "I_syn_inh' = (-I_syn_inh) / tau_syn_inh",
            "initial_values": {
                "I_syn_inh": "0"
            }
        },
        {
            "expression": "V_m' = (-(V_m - E_L)) / tau_m + (I_syn_exc - I_syn_inh + I_e + I_stim) / C_m",
            "initial_values": {
                "V_m": "E_L"
            }
        },
        {
            "expression": "refr_t' = (-1000.0) * 1.0 / 1000.0",
            "initial_values": {
                "refr_t": "0"
            }
        },
        {
            "expression": "post_tr__for_neuromodulated_stdp_synapse_nestml' = (-post_tr__for_neuromodulated_stdp_synapse_nestml) / tau_tr_post__for_neuromodulated_stdp_synapse_nestml",
            "initial_values": {
                "post_tr_

[36,GLOBAL, INFO]: Analysing/transforming model 'iaf_psc_exp_neuron_nestml__with_neuromodulated_stdp_synapse_nestml'
[37,iaf_psc_exp_neuron_nestml__with_neuromodulated_stdp_synapse_nestml, INFO, [55:0;108:0]]: Starts processing of the model 'iaf_psc_exp_neuron_nestml__with_neuromodulated_stdp_synapse_nestml'


DEBUG:Created Shape with symbol I_syn_inh, derivative_factors = [-1/tau_syn_inh], inhom_term = 0.0, nonlin_term = 0
INFO:	Returning shape: Shape "I_syn_inh" of order 1
INFO:
Processing differential-equation form shape V_m with defining expression = "(-(V_m - E_L)) / tau_m + (I_syn_exc - I_syn_inh + I_e + I_stim) / C_m"
DEBUG:Splitting expression (E_L - V_m)/tau_m + (I_e + I_stim + I_syn_exc - I_syn_inh)/C_m (symbols [I_syn_exc, I_syn_inh, V_m, refr_t, post_tr__for_neuromodulated_stdp_synapse_nestml, I_syn_exc, I_syn_inh, V_m])
DEBUG:	linear factors: Matrix([[1/C_m], [-1/C_m], [-1/tau_m], [0], [0], [0], [0], [0]])
DEBUG:	inhomogeneous term: E_L/tau_m + I_e/C_m + I_stim/C_m
DEBUG:	nonlinear term: 0.0
DEBUG:Created Shape with symbol V_m, derivative_factors = [-1/tau_m], inhom_term = E_L/tau_m + I_e/C_m + I_stim/C_m, nonlin_term = I_syn_exc/C_m - I_syn_inh/C_m
INFO:	Returning shape: Shape "V_m" of order 1
INFO:
Processing differential-equation form shape refr_t with defining expression = "

DEBUG:Initializing system of shapes with x = Matrix([[I_syn_exc], [I_syn_inh], [V_m], [refr_t], [post_tr__for_neuromodulated_stdp_synapse_nestml]]), A = Matrix([[-1/tau_syn_exc, 0, 0, 0, 0], [0, -1/tau_syn_inh, 0, 0, 0], [1/C_m, -1/C_m, -1/tau_m, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, -1/tau_tr_post__for_neuromodulated_stdp_synapse_nestml]]), b = Matrix([[0], [0], [E_L/tau_m + I_e/C_m + I_stim/C_m], [-1.00000000000000], [0]]), c = Matrix([[0], [0], [0], [0], [0]])
DEBUG:System of equations:
DEBUG:x = Matrix([[I_syn_exc], [I_syn_inh], [V_m], [refr_t], [post_tr__for_neuromodulated_stdp_synapse_nestml]])
DEBUG:A = Matrix([
[-1/tau_syn_exc,              0,        0, 0,                                                      0],
[             0, -1/tau_syn_inh,        0, 0,                                                      0],
[         1/C_m,         -1/C_m, -1/tau_m, 0,                                                      0],
[             0,              0,        0, 0,                    

[38,GLOBAL, INFO]: Analysing/transforming synapse neuromodulated_stdp_synapse_nestml__with_iaf_psc_exp_neuron_nestml.
[39,neuromodulated_stdp_synapse_nestml__with_iaf_psc_exp_neuron_nestml, INFO, [26:0;88:0]]: Starts processing of the model 'neuromodulated_stdp_synapse_nestml__with_iaf_psc_exp_neuron_nestml'


INFO:Saving dependency graph plot to /tmp/ode_dependency_graph_analytically_solvable.dot
DEBUG:os.makedirs('/tmp')
DEBUG:write lines to '/tmp/ode_dependency_graph_analytically_solvable.dot'
DEBUG:run [PosixPath('dot'), '-Kdot', '-Tpdf', '-O', 'ode_dependency_graph_analytically_solvable.dot']
INFO:Generating propagators for the following symbols: pre_tr
DEBUG:Initializing system of shapes with x = Matrix([[pre_tr]]), A = Matrix([[-1/tau_tr_pre]]), b = Matrix([[0]]), c = Matrix([[0]])
DEBUG:System of equations:
DEBUG:x = Matrix([[pre_tr]])
DEBUG:A = Matrix([[-1/tau_tr_pre]])
DEBUG:b = Matrix([[0]])
DEBUG:c = Matrix([[0]])
INFO:update_expr[pre_tr] = __P__pre_tr__pre_tr*pre_tr
INFO:In ode-toolbox: returning outdict = 
INFO:[
    {
        "initial_values": {
            "pre_tr": "0.0"
        },
        "parameters": {
            "tau_tr_pre": "20.0000000000000"
        },
        "propagators": {
            "__P__pre_tr__pre_tr": "exp(-__h/tau_tr_pre)"
        },
        "solver": "ana

[41,neuromodulated_stdp_synapse_nestml__with_iaf_psc_exp_neuron_nestml, INFO, [62:13;62:18]]: Implicit casting from (compatible) type '1 / ms' to 'real'.
[42,neuromodulated_stdp_synapse_nestml__with_iaf_psc_exp_neuron_nestml, INFO, [83:13;84:59]]: Implicit casting from (compatible) type 'ms' to 'real'.
[44,neuromodulated_stdp_synapse_nestml__with_iaf_psc_exp_neuron_nestml, INFO, [62:13;62:18]]: Implicit casting from (compatible) type '1 / ms' to 'real'.
[45,neuromodulated_stdp_synapse_nestml__with_iaf_psc_exp_neuron_nestml, INFO, [83:13;84:59]]: Implicit casting from (compatible) type 'ms' to 'real'.
[46,GLOBAL, INFO]: Rendering template /home/charl/julich/nestml-fork-AlexisWis-cart_pole_tutorial/nestml/doc/tutorials/cart_pole_reinforcement_learning/target/iaf_psc_exp_neuron_nestml.cpp
[47,GLOBAL, INFO]: Rendering template /home/charl/julich/nestml-fork-AlexisWis-cart_pole_tutorial/nestml/doc/tutorials/cart_pole_reinforcement_learning/target/iaf_psc_exp_neuron_nestml.h
[48,iaf_psc_exp_

In file included from /home/charl/julich/nestml-fork-AlexisWis-cart_pole_tutorial/nestml/doc/tutorials/cart_pole_reinforcement_learning/target/nestml_17cabc36be1d44369f57ca5fb6008595_module.cpp:36:
/home/charl/julich/nestml-fork-AlexisWis-cart_pole_tutorial/nestml/doc/tutorials/cart_pole_reinforcement_learning/target/neuromodulated_stdp_synapse_nestml__with_iaf_psc_exp_neuron_nestml.h: In instantiation of ‘nest::neuromodulated_stdp_synapse_nestml__with_iaf_psc_exp_neuron_nestml<targetidentifierT>::neuromodulated_stdp_synapse_nestml__with_iaf_psc_exp_neuron_nestml() [with targetidentifierT = nest::TargetIdentifierPtrRport]’:
/home/charl/julich/nest-simulator-install/include/nest/connector_model.h:164:25:   required from ‘nest::GenericConnectorModel<ConnectionT>::GenericConnectorModel(std::string) [with ConnectionT = nest::neuromodulated_stdp_synapse_nestml__with_iaf_psc_exp_neuron_nestml<nest::TargetIdentifierPtrRport>; std::string = std::__cxx11::basic_string<char>]’
/home/charl/julich

/home/charl/julich/nestml-fork-AlexisWis-cart_pole_tutorial/nestml/doc/tutorials/cart_pole_reinforcement_learning/target/neuromodulated_stdp_synapse_nestml__with_iaf_psc_exp_neuron_nestml.h: In instantiation of ‘bool nest::neuromodulated_stdp_synapse_nestml__with_iaf_psc_exp_neuron_nestml<targetidentifierT>::send(nest::Event&, size_t, const nest::neuromodulated_stdp_synapse_nestml__with_iaf_psc_exp_neuron_nestmlCommonSynapseProperties&) [with targetidentifierT = nest::TargetIdentifierPtrRport; size_t = long unsigned int]’:
/home/charl/julich/nest-simulator-install/include/nest/connector_base.h:391:22:   required from ‘void nest::Connector<ConnectionT>::send_to_all(size_t, const std::vector<nest::ConnectorModel*>&, nest::Event&) [with ConnectionT = nest::neuromodulated_stdp_synapse_nestml__with_iaf_psc_exp_neuron_nestml<nest::TargetIdentifierPtrRport>; size_t = long unsigned int]’
/home/charl/julich/nest-simulator-install/include/nest/connector_base.h:383:3:   required from here
  605 |

[100%] Linking CXX shared module nestml_17cabc36be1d44369f57ca5fb6008595_module.so
[100%] Built target nestml_17cabc36be1d44369f57ca5fb6008595_module_module
[100%] Built target nestml_17cabc36be1d44369f57ca5fb6008595_module_module
Install the project...
-- Install configuration: ""
-- Installing: /tmp/nestml_target_lrqc1inz/nestml_17cabc36be1d44369f57ca5fb6008595_module.so


In [2]:
import nest


class SpikingAgent(Agent):
    def __init__(self, initial_state: Tuple[float,float,float,float]) -> None:
        super().__init__(initial_state)

        # ...
    
    def get_state_neuron(self, state) -> int:
        idx = 0
        thresholds = [self.x_thresholds, self.theta_thresholds, self.v_thresholds, self.w_thresholds]
        for dim, val, thresh in zip(self.dimensions, state, thresholds):
            i = self.discretize(val,thresh)
            if i == -1: return -1
            idx = idx * dim + i

        return idx
    
    def construct_neural_network(self):
        nest.ResetKernel()
        nest.Install(input_layer_module_name)   # makes the generated NESTML model available
        nest.Install(output_layer_module_name)   # makes the generated NESTML model available
        """
        #PROBLEM: NEST likes to use two NodeCollection objects
        #perhaps just use an equation to derive the correct index?
        self.input_population = np.empty((len(self.x_thresholds) - 1, 
                                          len(self.theta_thresholds) - 1, 
                                          len(self.v_thresholds) - 1, 
                                          len(self.w_thresholds) - 1))
        
        for idx, _ in np.ndenumerate(self.input_population):
            self.input_population[idx[0], idx[1], idx[2], idx[3]] = nest.Create(neuron_model_name)
        """
        input_size = self.dimensions[0] * self.dimensions[1] * self.dimensions[2] * self.dimensions[3]
        self.input_population = nest.Create(input_layer_neuron_model_name, input_size)
        self.output_population = nest.Create(output_layer_neuron_model_name, 2) #2? 10?

        

    def update(self, next_state: Tuple[float,float,float,float]):
        box = self.get_box(next_state)  
        
        # set input current on the neuron (I_e) to make it fire (at firing rate = ???)
        self.input_population[box[0], box[1], box[2], box[3]].I_e = ???    # a current to make the neuron fire at a "reasonable" rate (like 10 Hz)



SyntaxError: invalid syntax (1176944502.py, line 45)

# Executing spiking version

In [None]:
import sys
import matplotlib.pyplot as plt

r = Renderer(1200, 800, 600, 500, 400)
clock = pg.time.Clock()
running = True

p = Physics(0, (np.random.rand() - 1) / 10)

a = SpikingAgent(p.get_state(), 0.5, 0.9999, 1, 0.995, 0.99)

plt.ion()  # turning interactive mode on
# preparing the data
y_plot = [0]
x_plot = [0]

# plotting the first frame
graph = plt.plot(x_plot,y_plot)[0]
plt.pause(1)

steps_per_episode = 0
max_steps = 0

while running:
    steps_per_episode += 1

    force = 0
    mouse_x = None

    # poll for events
    for event in pg.event.get():
        if event.type == pg.QUIT:
            running = False
            pg.quit()
            sys.exit()
            quit()
        elif event.type == pg.MOUSEBUTTONDOWN:
            mouse_x = r.get_relative_mouse_x(pg.mouse.get_pos()[0])

    # agent chooses action, simulation is uodated and reward is calculated
    force = 10 if a.choose_action() else -10
    theta, x = p.update(force, mouse_x)
    failure = a.update(p.get_state())

    if failure:
        p.reset()
        a.failure_reset(p.get_state())

        if steps_per_episode > max_steps:
            max_steps = steps_per_episode
        y_plot.append(steps_per_episode)
        x_plot.append(a.get_episode())
        
        # removing the older graph
        graph.remove()
        
        # plotting newer graph
        graph = plt.plot(x_plot,y_plot,color = 'g')[0]
        plt.xlim(x_plot[0], x_plot[-1])
        plt.ylim(0, max_steps)
        # calling pause function to let it draw the graoh in between episodes
        plt.pause(0.0001)

        steps_per_episode = 0
    
    
    if a.get_episode() > 1000:
        r.draw_clear()
        r.draw_ground(0.2, "grey")
        r.draw_car(x)
        r.draw_pole(x, theta, 2*p.l, 0.02)
        r.draw_stats(theta*180/np.pi, p.w*180/np.pi, x, p.a, a.get_episode())
        r.display()

        clock.tick(50)  # limits FPS to 50
