In [2]:
import numpy as np
import random
import torch
import torch.nn as nn
import torch.optim as optim

# Environment for Cloud-Edge Task Offloading
class CloudEdgeEnvironment:
    def __init__(self, num_tasks, num_local_machines, num_edge_servers):
        self.num_tasks = num_tasks
        self.num_local_machines = num_local_machines
        self.num_edge_servers = num_edge_servers
        self.task_sizes = np.random.uniform(1, 10, size=num_tasks)  # Task sizes (δk)
        self.local_powers = np.random.uniform(1, 5, size=num_local_machines)  # Local machine powers (ri)
        self.edge_powers = np.random.uniform(5, 10, size=num_edge_servers)  # Edge server powers (rj)
        self.local_energy_rates = np.random.uniform(0.1, 0.5, size=num_local_machines)  # εik
        self.edge_energy_rates = np.random.uniform(0.2, 0.6, size=num_edge_servers)  # εjk
        self.offloading_costs = np.random.uniform(0.1, 0.3, size=num_edge_servers)  # γjk
        self.reset()  # Initialize allocations

    def reset(self):
        self.allocations = np.zeros((self.num_tasks, self.num_local_machines + self.num_edge_servers))
        return self._get_state()

    def _get_state(self):
        return np.concatenate([
            self.task_sizes,
            self.local_powers,
            self.edge_powers,
            self.allocations.flatten()
        ])

    def step(self, action):
        # Unpacking the action tuple
        task_idx, machine_type, machine_idx = action

        # Assign task to the corresponding machine
        if machine_type == "local":
            self.allocations[task_idx, machine_idx] = 1
        elif machine_type == "edge":
            self.allocations[task_idx, self.num_local_machines + machine_idx] = 1

        # Calculate total energy consumption
        total_energy = self._calculate_energy()
        reward = -total_energy  # Minimize energy
        done = np.all(self.allocations.sum(axis=1) == 1)  # All tasks assigned
        return self._get_state(), reward, done, {}

    def _calculate_energy(self):
        energy = 0
        for task_idx in range(self.num_tasks):
            for local_idx in range(self.num_local_machines):
                if self.allocations[task_idx, local_idx] == 1:
                    tik = self.task_sizes[task_idx] / self.local_powers[local_idx]
                    energy += tik * self.local_energy_rates[local_idx]
            for edge_idx in range(self.num_edge_servers):
                if self.allocations[task_idx, self.num_local_machines + edge_idx] == 1:
                    tjk = self.task_sizes[task_idx] / self.edge_powers[edge_idx]
                    energy += tjk * self.edge_energy_rates[edge_idx] + self.task_sizes[task_idx] * self.offloading_costs[edge_idx]
        return energy

# Q-Network for DQN
class QNetwork(nn.Module):
    def __init__(self, state_dim, action_dim):
        super(QNetwork, self).__init__()
        self.fc1 = nn.Linear(state_dim, 128)
        self.fc2 = nn.Linear(128, 128)
        self.fc3 = nn.Linear(128, action_dim)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        return self.fc3(x)

# DQN Agent
class DQNAgent:
    def __init__(self, state_dim, action_dim, num_tasks, num_local_machines, num_edge_servers, 
                 lr=1e-3, gamma=0.99, epsilon_decay=0.995, batch_size=64, buffer_capacity=10000):
        self.state_dim = state_dim
        self.action_dim = action_dim
        self.num_tasks = num_tasks
        self.num_local_machines = num_local_machines
        self.num_edge_servers = num_edge_servers
        self.total_machines = num_local_machines + num_edge_servers  # Total possible assignments

        self.q_network = QNetwork(state_dim, action_dim)
        self.target_network = QNetwork(state_dim, action_dim)
        self.optimizer = optim.Adam(self.q_network.parameters(), lr=lr)
        self.replay_buffer = []
        self.buffer_capacity = buffer_capacity
        self.batch_size = batch_size
        self.gamma = gamma
        self.epsilon = 1.0
        self.epsilon_decay = epsilon_decay
        self.epsilon_min = 0.01

    def act(self, state):
        if random.random() < self.epsilon:
            action = random.randint(0, self.action_dim - 1)
        else:
            state_tensor = torch.FloatTensor(state).unsqueeze(0)
            with torch.no_grad():
                action = torch.argmax(self.q_network(state_tensor)).item()

        # Convert action from an integer to (task_idx, machine_type, machine_idx)
        task_idx = action // self.total_machines
        machine_idx = action % self.total_machines
        machine_type = "local" if machine_idx < self.num_local_machines else "edge"
        machine_idx = machine_idx if machine_type == "local" else machine_idx - self.num_local_machines

        return (task_idx, machine_type, machine_idx)

    def store_transition(self, transition):
        if len(self.replay_buffer) >= self.buffer_capacity:
            self.replay_buffer.pop(0)
        self.replay_buffer.append(transition)

    def sample_batch(self):
        indices = np.random.choice(len(self.replay_buffer), self.batch_size)
        batch = [self.replay_buffer[idx] for idx in indices]
        return batch

    def learn(self):
        if len(self.replay_buffer) < self.batch_size:
            return
        batch = self.sample_batch()
        states, actions, rewards, next_states, dones = zip(*batch)

        states = torch.FloatTensor(states)
        rewards = torch.FloatTensor(rewards)
        next_states = torch.FloatTensor(next_states)
        dones = torch.FloatTensor(dones)

        action_indices = [a[0] * self.total_machines + (a[2] if a[1] == "local" else self.num_local_machines + a[2]) for a in actions]
        actions = torch.LongTensor(action_indices)

        q_values = self.q_network(states).gather(1, actions.unsqueeze(1)).squeeze()
        with torch.no_grad():
            max_next_q_values = self.target_network(next_states).max(1)[0]
            target_q_values = rewards + self.gamma * max_next_q_values * (1 - dones)

        loss = nn.MSELoss()(q_values, target_q_values)
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()

    def update_target_network(self):
        self.target_network.load_state_dict(self.q_network.state_dict())

# Initialize environment and agent
num_tasks = 10
num_local_machines = 3
num_edge_servers = 2
env = CloudEdgeEnvironment(num_tasks, num_local_machines, num_edge_servers)
state_dim = env._get_state().shape[0]
action_dim = num_tasks * (num_local_machines + num_edge_servers)
agent = DQNAgent(state_dim, action_dim, num_tasks, num_local_machines, num_edge_servers)
num_episodes = 500

# Training loop
for episode in range(num_episodes):
    state = env.reset()
    total_reward = 0
    for _ in range(num_tasks):
        action = agent.act(state)
        next_state, reward, done, _ = env.step(action)
        agent.store_transition((state, action, reward, next_state, done))
        state = next_state
        total_reward += reward
        agent.learn()
        if done:
            break
    agent.update_target_network()
    agent.epsilon = max(agent.epsilon_min, agent.epsilon * agent.epsilon_decay)
    print(f"Episode {episode}, Total Reward: {total_reward}")


Episode 0, Total Reward: -67.56208901235622
Episode 1, Total Reward: -52.511140539251784
Episode 2, Total Reward: -70.67182422367375
Episode 3, Total Reward: -41.524338014442925
Episode 4, Total Reward: -52.74281353476023
Episode 5, Total Reward: -48.12339073872033
Episode 6, Total Reward: -47.625246288775806
Episode 7, Total Reward: -70.31245353237527


  states = torch.FloatTensor(states)
  dones = torch.FloatTensor(dones)


Episode 8, Total Reward: -74.13191503169318
Episode 9, Total Reward: -48.015092514160386
Episode 10, Total Reward: -55.98013898056424
Episode 11, Total Reward: -50.54647079895696
Episode 12, Total Reward: -42.7210737624273
Episode 13, Total Reward: -60.39831998881027
Episode 14, Total Reward: -55.6800355163615
Episode 15, Total Reward: -48.161208677279234
Episode 16, Total Reward: -76.17439859507789
Episode 17, Total Reward: -73.50316990472865
Episode 18, Total Reward: -84.14441668520904
Episode 19, Total Reward: -46.72254984758836
Episode 20, Total Reward: -75.03104303397873
Episode 21, Total Reward: -91.10621468176014
Episode 22, Total Reward: -56.42720678824418
Episode 23, Total Reward: -28.41135521222106
Episode 24, Total Reward: -50.337395531006756
Episode 25, Total Reward: -58.98141434669377
Episode 26, Total Reward: -36.814089566390834
Episode 27, Total Reward: -38.902013459108325
Episode 28, Total Reward: -54.20878866652578
Episode 29, Total Reward: -29.887179437164406
Episode 