In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import random
from collections import deque, namedtuple

# ===== AIoT Paper Components =====
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
Transition = namedtuple('Transition', ('state', 'action', 'next_state', 'reward', 'priority'))

class Server:
    def __init__(self, server_id, processing_speed, transmission_delay):
        self.id = server_id
        self.processing_speed = processing_speed  # From AIoT paper Eq.9
        self.transmission_delay = transmission_delay
        self.queues = {'high': deque(), 'low': deque()}  # Priority queues (AIoT Eq.3-8)
        self.current_time = 0.0

    def add_task(self, task_size, arrival_time, priority):
        """AIoT-enhanced task addition with priority handling"""
        processing_time = task_size / self.processing_speed
        last_end = max([q[-1][1] for q in self.queues.values() if q]) if any(self.queues.values()) else self.current_time
        start_time = max(arrival_time + self.transmission_delay, last_end)
        end_time = start_time + processing_time
        self.queues[priority].append((task_size, end_time))
        self.current_time = max(self.current_time, end_time)
        return end_time - arrival_time

    def reset(self):
        self.queues['high'].clear()
        self.queues['low'].clear()
        self.current_time = 0.0

class AIoT_DQN(nn.Module):
    """Enhanced DQN architecture from AIoT paper"""
    def __init__(self, input_size, output_size):
        super(AIoT_DQN, self).__init__()
        self.fc = nn.Sequential(
            nn.Linear(input_size, 64),
            nn.ReLU(),
            nn.Linear(64, 64),
            nn.ReLU(),
            nn.Linear(64, output_size)
        )
    
    def forward(self, x):
        return self.fc(x)

class AIoT_DQNAgent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=10000)
        self.batch_size = 64
        
        # AIoT parameters
        self.ω_H = 100  # High-priority weight (Table II)
        self.ω_L = 1    # Low-priority weight
        self.gamma = 0.95
        self.epsilon = 1.0
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        
        # Double DQN implementation
        self.policy_net = AIoT_DQN(state_size, action_size).to(device)
        self.target_net = AIoT_DQN(state_size, action_size).to(device)
        self.target_net.load_state_dict(self.policy_net.state_dict())
        
        self.optimizer = optim.Adam(self.policy_net.parameters(), lr=0.001)
        self.loss_fn = nn.HuberLoss()  # AIoT Eq.32

    def get_state(self, task_size, servers):
        """Enhanced state space (AIoT Eq.17 + server capabilities)"""
        state = [task_size]
        for s in servers:
            state += [
                len(s.queues['high']), 
                len(s.queues['low']),
                s.processing_speed,
                s.transmission_delay,
                np.random.rand()  # Simulated channel gain
            ]
        return np.array(state, dtype=np.float32)

    def act(self, state):
        if random.random() < self.epsilon:
            return random.randint(0, self.action_size-1)
        
        with torch.no_grad():
            state_tensor = torch.FloatTensor(state).unsqueeze(0).to(device)
            q_values = self.policy_net(state_tensor)
            return q_values.argmax().item()

    def remember(self, state, action, next_state, reward, priority):
        self.memory.append(Transition(state, action, next_state, reward, priority))

    def replay(self):
        if len(self.memory) < self.batch_size:
            return
        
        transitions = random.sample(self.memory, self.batch_size)
        batch = Transition(*zip(*transitions))
        
        # Priority-aware experience replay
        weights = torch.tensor([
            1.0 if t.priority == 'high' else 0.3 for t in transitions
        ], dtype=torch.float32).to(device)
        
        state_batch = torch.FloatTensor(batch.state).to(device)
        action_batch = torch.LongTensor(batch.action).unsqueeze(1).to(device)
        reward_batch = torch.FloatTensor(batch.reward).to(device)
        next_state_batch = torch.FloatTensor(batch.next_state).to(device)
        
        current_q = self.policy_net(state_batch).gather(1, action_batch)
        next_q = self.target_net(next_state_batch).max(1)[0].detach()
        expected_q = reward_batch + (self.gamma * next_q)
        
        loss = self.loss_fn(current_q.squeeze(), expected_q)
        loss = (weights * loss).mean()
        
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()
        
        self.epsilon = max(self.epsilon_min, self.epsilon * self.epsilon_decay)

    def update_target_network(self):
        self.target_net.load_state_dict(self.policy_net.state_dict())


# ===== Simulation Functions =====
def generate_tasks(n_tasks, priority_ratio=0.3):
    tasks = []
    for _ in range(n_tasks):
        size = np.random.randint(1, 100)
        arrival = np.random.exponential(scale=1/10)
        priority = 'high' if random.random() < priority_ratio else 'low'
        tasks.append((size, arrival, priority))
    return sorted(tasks, key=lambda x: x[1])

def run_simulation(agent, servers, tasks, is_training=True):
    total_time = 0.0
    for task_size, arrival_time, priority in tasks:
        state = agent.get_state(task_size, servers)
        action = agent.act(state)
        selected_server = servers[action]
        
        latency = selected_server.add_task(task_size, arrival_time, priority)
        reward = - (agent.ω_H * latency if priority == 'high' else agent.ω_L * latency)
        next_state = agent.get_state(task_size, servers)
        
        if is_training:
            agent.remember(state, action, next_state, reward, priority)
            agent.replay()
            if len(agent.memory) % 100 == 0:
                agent.update_target_network()
        
        total_time = max(total_time, latency)
    return total_time

# ===== Main Execution =====
def main():
    # Training Parameters
    n_episodes = 500
    n_tasks = 1600
    n_eval_trials = 10
    
    # Initialize components
    servers = [Server(i, np.random.randint(1,10), np.random.randint(2,20)) 
              for i in range(10)]
    agent = AIoT_DQNAgent(state_size=51, action_size=10)  # 1 + 10*(4+1)

    # Training Phase
    print("Starting AIoT-enhanced training...")
    for episode in range(n_episodes):
        # Reset environment
        servers = [Server(i, np.random.randint(1,10), np.random.randint(2,20)) 
                  for i in range(10)]
        tasks = generate_tasks(n_tasks)
        
        max_latency = run_simulation(agent, servers, tasks)
        
        # ==== CORRECTED SECTION ====
        # Convert deque to list for slicing
        memory_list = list(agent.memory)
        recent_transitions = memory_list[-n_tasks:] if len(memory_list) >= n_tasks else memory_list
        # ===========================
        
        total_reward = sum(t.reward for t in recent_transitions)
        avg_reward = total_reward / len(recent_transitions) if recent_transitions else 0

        if (episode+1) % 50 == 0:
            print(f"Episode {episode+1}/{n_episodes} | "
                  f"Max Latency: {max_latency:.2f} ms | "
                  f"Avg Reward: {avg_reward:.2f} | "
                  f"Epsilon: {agent.epsilon:.3f}")

    # Evaluation Phase
    print("\nStarting AIoT-enhanced evaluation...")
    eval_times = []
    for trial in range(n_eval_trials):
        servers = [Server(i, np.random.randint(1,10), np.random.randint(2,20)) 
                  for i in range(10)]
        tasks = generate_tasks(n_tasks)
        max_latency = run_simulation(agent, servers, tasks, is_training=False)
        eval_times.append(max_latency)
        print(f"Evaluation Trial {trial+1}/{n_eval_trials} | "
              f"Max Latency: {max_latency:.2f} ms")

    # Final Results
    print("\nFinal AIoT-DQN Performance:")
    print(f"Average Max Latency: {np.mean(eval_times):.2f} ± {np.std(eval_times):.2f} ms")
    print(f"Best Trial: {np.min(eval_times):.2f} ms")
    print(f"Worst Trial: {np.max(eval_times):.2f} ms")

if __name__ == "__main__":
    main()

Starting AIoT-enhanced training...


  state_batch = torch.FloatTensor(batch.state).to(device)


Episode 50/500 | Max Latency: 6199.59 ms | Avg Reward: -38057.51 | Epsilon: 0.010
Episode 100/500 | Max Latency: 8184.86 ms | Avg Reward: -39084.65 | Epsilon: 0.010
Episode 150/500 | Max Latency: 3521.50 ms | Avg Reward: -31184.97 | Epsilon: 0.010
Episode 200/500 | Max Latency: 3257.87 ms | Avg Reward: -19248.90 | Epsilon: 0.010
Episode 250/500 | Max Latency: 4779.72 ms | Avg Reward: -33061.05 | Epsilon: 0.010
Episode 300/500 | Max Latency: 2942.15 ms | Avg Reward: -28367.73 | Epsilon: 0.010
Episode 350/500 | Max Latency: 9664.99 ms | Avg Reward: -48402.94 | Epsilon: 0.010
Episode 400/500 | Max Latency: 2872.71 ms | Avg Reward: -27866.50 | Epsilon: 0.010
Episode 450/500 | Max Latency: 7667.92 ms | Avg Reward: -31305.34 | Epsilon: 0.010
Episode 500/500 | Max Latency: 7983.87 ms | Avg Reward: -64655.31 | Epsilon: 0.010

Starting AIoT-enhanced evaluation...
Evaluation Trial 1/10 | Max Latency: 10549.36 ms
Evaluation Trial 2/10 | Max Latency: 5195.06 ms
Evaluation Trial 3/10 | Max Latency: