In [1]:
import numpy as np
from ddqla.agents import BaseAgent
from keras.layers import Input, Dense
from keras.models import Model
from keras.optimizers.legacy import Adam
from keras.losses import MeanSquaredError


class Imex(BaseAgent):
    __ACTIONS = 4
    __STARTUP_ENVIRONMENT = np.asarray([7, 4, 1, 4], dtype=np.float32)
    __MATRIX_SIDE_DIM = 9
    __DENSE_DIM = 100

    def __init__(self):
        super().__init__(
            Imex.__ACTIONS,
            Imex.__STARTUP_ENVIRONMENT,
            fit_each_n_steps=7,
            cumulative_rewards_max_length=70
        )

    def _get_model(self, state_features):
        inputs = Input(shape=(state_features,))
        dense = Dense(Imex.__DENSE_DIM, activation='swish')(inputs)
        dense = Dense(Imex.__DENSE_DIM, activation='swish')(dense)
        outputs = Dense(Imex.__ACTIONS, activation='linear')(dense)
        model = Model(inputs=inputs, outputs=outputs)
        model.compile(optimizer=Adam(), loss=MeanSquaredError())
        #model.summary()
        return model

    def reset_state(self):
        for i in range(0, len(self._state)):
            self._state[i] = Imex.__STARTUP_ENVIRONMENT[i]

    def _get_reward(self, action, environment):
        self.environment_log.append(environment)
        reward = -1
        new_pos = np.asarray([self._state[0], self._state[1]])
        match action:
            case 0:  # up
                new_pos[1] -= 1
            case 1:  # right
                new_pos[0] += 1
            case 2:  # down
                new_pos[1] += 1
            case 3:  # left
                new_pos[0] -= 1
        if 0 <= new_pos[0] < Imex.__MATRIX_SIDE_DIM and 0 <= new_pos[1] < Imex.__MATRIX_SIDE_DIM:
            environment[0] = new_pos[0]
            environment[1] = new_pos[1]
            self._state[0] = new_pos[0]
            self._state[1] = new_pos[1]
        if new_pos[0] == self._state[2] and new_pos[1] == self._state[3]:
            reward = 6
            self.reset_state()
        return reward


In [None]:
imex = Imex()

In [None]:
for step in range(0, 15000):
    imex.step()
    if step % 100 == 0 and imex.is_memory_ready():
        rewards = imex.test(70)
        cum_rewards = imex.get_last_cumulative_rewards()
        print('#', step, '  CR: ', np.sum(cum_rewards), '  R: ', rewards)

In [None]:
imex.summary()