In [33]:
import importlib
import environment 
importlib.reload(environment)

import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import MeanSquaredError
from collections import deque
from tqdm import tqdm

In [7]:
initial_setting = {
    'agents': np.array([[5, 5], [10, 10], [15, 15]], dtype=float),
    'blocks': np.array([[10, 5], [4, 16], [14, 6]], dtype=float),
    'colors': np.array([environment.RED, environment.RED, environment.BLUE], dtype=int)
}
env = environment.Environment(objective = [(environment.RED, environment.TOP_EDGE), (environment.BLUE, environment.RIGHT_EDGE)],
                   size = environment.SIMULATION_ARENA_SIZE, 
                   n_agents = 3, 
                   n_blocks = 3,
                   n_neighbors = 3,
                   sensor_range = environment.SIMULATION_SENSOR_RANGE,
                   sensor_angle = 360,
                   max_distance_covered_per_step = environment.SIMULATION_MAX_DISTANCE,
                   sensitivity = 0.5,
                   initial_setting = initial_setting)
initial_state, _ = env.reset()
env.print_env()
initial_state

. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . [91mO[0m . . . .
. . . . . . [0m0[0m . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . [91mO[0m . . . . . [0m1[0m . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . [94mO[0m . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . [0m2[0m . . . . . .

[{'neighbors': array([[0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.]]),
  'carrying': -1},
 {'neighbors': array([[0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.]]),
  'carrying': -1},
 {'neighbors': array([[0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.]]),
  'carrying': -1}]

Discretize action. pick up, put down, move S, SE, E, NE, N, NW, W, SW. 10 actions in total.

In [21]:
PICK_UP = 0
DROP = 1
S = 2
SE = 3
E = 4
NE = 5
N = 6
NW = 7  
W = 8
SW = 9
ACTIONS = [PICK_UP, DROP, S, SE, E, NE, N, NW, W, SW]

def discretize_actions(actions):
    # Translate discretize action to environment action
    act_values = []
    for a in actions:
        if a == PICK_UP:
            act_values.append({
                "action": environment.PICK_UP,
                "move": [0, 0]
            })
        elif a == DROP:
            act_values.append({
                "action": environment.PUT_DOWN,
                "move": [0, 0]
            })
        elif a == S:
            act_values.append({
                "action": environment.MOVE,
                "move": [1, 0]
            })
        elif a == SE:
            act_values.append({
                "action": environment.MOVE,
                "move": [1, 45]
            })
        elif a == E:
            act_values.append({
                "action": environment.MOVE,
                "move": [1, 90]
            })
        elif a == NE:
            act_values.append({
                "action": environment.MOVE,
                "move": [1, 135]
            })
        elif a == N:
            act_values.append({
                "action": environment.MOVE,
                "move": [1, 180]
            })
        elif a == NW:
            act_values.append({
                "action": environment.MOVE,
                "move": [1, 225]
            })
        elif a == W:
            act_values.append({
                "action": environment.MOVE,
                "move": [1, 270]
            })
        elif a == SW:
            act_values.append({
                "action": environment.MOVE,
                "move": [1, 315]
            }) 
    return act_values

state_size = len(env.process_observation(initial_state)[0])
action_size = len(ACTIONS)
state_size, action_size

(44, 10)

In [64]:
class DQNAgent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=2000)
        self.gamma = 0.95  # Discount factor
        self.epsilon = 1.0  # Exploration rate
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.model = self._build_model()

    def _build_model(self):
        model = Sequential()
        model.add(Dense(24, input_dim=self.state_size, activation='relu'))
        model.add(Dense(24, activation='relu'))
        model.add(Dense(self.action_size, activation='linear'))
        model.compile(optimizer=Adam(), loss=MeanSquaredError())
        return model

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):
        
        if np.random.rand() <= self.epsilon:
            action = np.random.choice(self.action_size, len(state))
        else:
            action = np.argmax(self.model.predict(state, verbose=0), axis=1)
        
        return action

    def replay(self, batch_size):
        minibatch_indices = np.random.choice(len(self.memory), batch_size, replace=False)
        minibatch = np.array(self.memory, dtype=object)[minibatch_indices]
        
        for state, action, reward, next_state, done in minibatch:
            target = reward
            
            if not done:
                target = reward + self.gamma * np.amax(self.model.predict(next_state, verbose = 0), axis=1)
            
            target_f = self.model.predict(state, verbose = 0)
            
            for i in range(len(action)):
                target_f[i][action[i]] = target[i]
            # target_f[action] = target
            self.model.fit(state, target_f, epochs=1, verbose=0)
        
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay


In [65]:
# Initialize the DQN agent
agent = DQNAgent(state_size, action_size)

# Training loop
batch_size = 16
num_episodes = 500
for episode in range(num_episodes):
    print(f"Episode {episode} of {num_episodes}")
    obs, _ = env.reset()
    
    state = env.process_observation(obs)
    state = np.reshape(state, [env.n_agents, state_size])
    
    for t in tqdm(range(200)):
        # Choose an action
        action = agent.act(state)
        
        action_env = discretize_actions(action)
        # Perform the action
        next_obs, reward, done, _, _ = env.step(action_env)

        next_state = env.process_observation(next_obs)
        next_state = np.reshape(next_state, [env.n_agents, state_size])

        # Remember the experience
        agent.remember(state, action, reward, next_state, done)
        # Update the state
        state = next_state

        # Check if episode is finished
        if done:
            break

        # Train the agent
        if len(agent.memory) > batch_size:
            agent.replay(batch_size)

Episode 0 of 500


  8%|▊         | 16/200 [00:00<00:05, 36.74it/s]


ValueError: in user code:

    File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/keras/engine/training.py", line 2137, in predict_function  *
        return step_function(self, iterator)
    File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/keras/engine/training.py", line 2123, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/keras/engine/training.py", line 2111, in run_step  **
        outputs = model.predict_step(data)
    File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/keras/engine/training.py", line 2079, in predict_step
        return self(x, training=False)
    File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/keras/utils/traceback_utils.py", line 70, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/keras/engine/input_spec.py", line 295, in assert_input_compatibility
        raise ValueError(

    ValueError: Input 0 of layer "sequential_14" is incompatible with the layer: expected shape=(None, 44), found shape=(None, 3, 44)


In [None]:
# Save model
agent.model.save('dqn_model.h5')

Too slow