In [None]:
import tensorflow as tf
import numpy as np
import random
import os
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from collections import deque
from environment import MECEnvironment  
from ddqn_agent import DDQNAgent  


np.random.seed(42)
random.seed(42)
tf.random.set_seed(42)

# Initialize environment
env = MECEnvironment()  
state_size = env.state_size
action_size = env.action_size


agent = DDQNAgent(state_size, action_size)

# Training parameters
EPISODES = 5000  
BATCH_SIZE = 64  
SAVE_PATH = "ddqn_model.h5"  
TARGET_UPDATE_FREQ = 10  


memory = deque(maxlen=20000)


for episode in range(EPISODES):
    state = env.reset()  
    state = np.reshape(state, [1, state_size])
    total_reward = 0

    for step in range(env.max_steps):
        action = agent.act(state)  
        next_state, reward, done, _ = env.step(action)  
        next_state = np.reshape(next_state, [1, state_size])

        
        memory.append((state, action, reward, next_state, done))
        state = next_state
        total_reward += reward

        
        if len(memory) > BATCH_SIZE:
            minibatch = random.sample(memory, BATCH_SIZE)
            agent.replay(minibatch)

        if done:
            break

    
    if episode % TARGET_UPDATE_FREQ == 0:
        agent.update_target_network()

    print(f"Episode {episode+1}/{EPISODES} - Reward: {total_reward:.2f}, Epsilon: {agent.epsilon:.4f}")

# Save trained model
agent.q_network.save(SAVE_PATH)
print(f"DDQN model weights saved to {SAVE_PATH}")
