<a href="https://colab.research.google.com/github/manonmanisekar/AR-vuforia/blob/main/jobcheduling.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [36]:
!pip install numpy torch matplotlib




In [37]:
import random

class JobAgent:
    """Represents a job in a Reconfigurable Manufacturing System (RMS)."""

    def __init__(self, job_id, operations, priority):
        """
        Initializes a job with multiple operations.

        Args:
            job_id (int): Unique job identifier.
            operations (list): List of operations as tuples (processing_time, required_config, cpu_demand, memory_demand).
            priority (int): Priority of the job (higher = more important).
        """
        self.job_id = job_id
        self.operations = operations  # List of (processing_time, required_config, cpu_demand, memory_demand)
        self.current_operation = 0  # Tracks which operation is being processed
        self.priority = priority
        self.start_time = None
        self.waiting_time = 0  # Tracks total waiting time
        self.failure_penalty = 0  # Tracks penalties due to machine failures

    def propose(self, machine):
        """Propose a job assignment to a machine, considering its current configuration and available resources."""
        if self.current_operation >= len(self.operations):
            return None  # No operations left

        processing_time, required_config, cpu_demand, memory_demand = self.operations[self.current_operation]

        return {
            "cpu": cpu_demand,
            "memory": memory_demand,
            "processing_time": processing_time,
            "required_config": required_config
        }

    def update_waiting_time(self, time_increment):
        """Increment waiting time when job is delayed."""
        self.waiting_time += time_increment

    def penalize_for_failure(self, penalty):
        """Apply a penalty due to machine failure affecting this job."""
        self.failure_penalty += penalty

    def move_to_next_operation(self):
        """Move to the next operation after completion."""
        if self.current_operation < len(self.operations) - 1:
            self.current_operation += 1
            return True
        return False  # Job is completed

    def __repr__(self):
        return f"Job {self.job_id}: Op {self.current_operation}/{len(self.operations)}, Priority={self.priority}, Waiting Time={self.waiting_time}, Failure Cost={self.failure_penalty}"


In [38]:
class MachineAgent:
    """Represents a reconfigurable machine in RMS."""

    def __init__(self, machine_id, max_cpu, max_memory, reconfiguration_time):
        """
        Initializes a reconfigurable machine.

        Args:
            machine_id (int): Unique identifier.
            max_cpu (int): Total CPU capacity.
            max_memory (int): Total memory capacity.
            reconfiguration_time (int): Time needed to reconfigure the machine for a new job type.
        """
        self.machine_id = machine_id
        self.max_cpu = max_cpu
        self.max_memory = max_memory
        self.available_cpu = max_cpu
        self.available_memory = max_memory
        self.current_config = None  # Machine configuration
        self.schedule = []
        self.current_time = 0
        self.utilization_time = 0  # Tracks active processing time
        self.reconfiguration_time = reconfiguration_time
        self.failure_rate = random.uniform(0.05, 0.2)  # Random failure probability
        self.failure_penalty = 0

    def respond_to_proposal(self, job_proposal):
        """Negotiate with the job based on machine availability and configuration requirements."""
        if not job_proposal:
            return "REJECT"

        cpu_request = job_proposal["cpu"]
        memory_request = job_proposal["memory"]
        processing_time = job_proposal["processing_time"]
        required_config = job_proposal["required_config"]

        # Handle machine failure (probabilistic)
        if random.random() < self.failure_rate:
            repair_time = random.randint(5, 15)
            self.current_time += repair_time
            self.failure_penalty += repair_time * 2  # High penalty for failures
            return "FAILURE", repair_time, self.failure_penalty

        # Check if machine needs reconfiguration
        setup_time = 0
        if self.current_config != required_config:
            setup_time = self.reconfiguration_time
            self.current_config = required_config

        # Check resource availability
        if cpu_request <= self.available_cpu and memory_request <= self.available_memory:
            start_time = max(self.current_time, self.get_next_available_time()) + setup_time
            end_time = start_time + processing_time
            self.schedule.append((start_time, end_time))
            self.available_cpu -= cpu_request
            self.available_memory -= memory_request
            self.utilization_time += processing_time
            self.current_time = end_time
            return "ACCEPT", start_time, end_time, setup_time

        # Counter-offer with reduced resources
        elif self.available_cpu > cpu_request * 0.7 and self.available_memory > memory_request * 0.7:
            return "COUNTER", {
                "cpu": int(cpu_request * 0.8),
                "memory": int(memory_request * 0.8),
                "processing_time": int(processing_time * 1.2)  # Longer due to fewer resources
            }
        else:
            return "REJECT"

    def get_next_available_time(self):
        """Find the next available time slot for the machine."""
        return self.schedule[-1][1] if self.schedule else 0

    def __repr__(self):
        return f"Machine {self.machine_id}: CPU={self.available_cpu}/{self.max_cpu}, Memory={self.available_memory}/{self.max_memory}, Failures={self.failure_penalty}, Utilization={self.utilization_time}"


In [39]:
import numpy as np

class JobSchedulingEnv:
    """Reinforcement Learning Environment for Job Scheduling in Reconfigurable Manufacturing Systems (RMS)."""

    def __init__(self, jobs, machines, negotiation_enabled=True):
        """
        Initializes the environment.

        Args:
            jobs (list): List of JobAgent instances.
            machines (list): List of MachineAgent instances.
            negotiation_enabled (bool): Whether job-machine negotiation is enabled.
        """
        self.jobs = jobs
        self.machines = machines
        self.current_job_index = 0
        self.done = False
        self.negotiation_enabled = negotiation_enabled

    def reset(self):
        """Resets the environment to its initial state."""
        self.current_job_index = 0
        self.done = False

        # Reset machine and job states
        for machine in self.machines:
            machine.available_cpu = machine.max_cpu
            machine.available_memory = machine.max_memory
            machine.schedule = []
            machine.utilization_time = 0
            machine.failure_penalty = 0

        for job in self.jobs:
            job.current_operation = 0
            job.start_time = None
            job.waiting_time = 0
            job.failure_penalty = 0

        return self.get_state()

    def get_state(self):
        """Returns the current state representation (jobs + machines)."""
        if self.current_job_index >= len(self.jobs):
            return np.zeros(len(self.machines) * 2 + 5)  # Dummy state when no jobs remain

        job = self.jobs[self.current_job_index]

        # Ensure the current operation index is within range
        if job.current_operation >= len(job.operations):
            job.current_operation = len(job.operations) - 1

        machine_status = np.array(
            [[m.available_cpu, m.available_memory] for m in self.machines]
        ).flatten()

        return np.concatenate((
            np.array([
                job.current_operation,
                job.operations[job.current_operation][0],  # Processing time
                job.operations[job.current_operation][1],  # Required Config
                job.operations[job.current_operation][2],  # CPU demand
                job.operations[job.current_operation][3],  # Memory demand
                job.priority
            ]), machine_status))

    def step(self, action):
        """Executes an action (machine selection) and updates the environment."""
        if self.current_job_index >= len(self.jobs):
            self.done = True
            return None, 0, self.done

        job = self.jobs[self.current_job_index]
        machine = self.machines[action]

        if self.negotiation_enabled:
            response = machine.respond_to_proposal(job.propose(machine))
        else:
            proposal = job.propose(machine)
            if proposal and proposal["cpu"] <= machine.available_cpu and proposal["memory"] <= machine.available_memory:
                response = ("ACCEPT", 0, 0, 0)
            else:
                response = ("REJECT",)

        # Handle machine failure or job scheduling
        if response[0] == "FAILURE":
            repair_time, failure_penalty = response[1], response[2]
            job.penalize_for_failure(failure_penalty)
            job.update_waiting_time(repair_time)
            reward = -20  # Large penalty for machine failure
        elif response[0] == "ACCEPT":
            _, start_time, end_time, setup_time = response
            job.start_time = start_time
            reward = 10 - setup_time  # Reward penalized by setup time
        elif response[0] == "COUNTER":
            reward = 5  # Partial reward for counter-offer
        else:
            reward = -10  # Rejection penalty

        self.current_job_index += 1
        if self.current_job_index >= len(self.jobs):
            self.done = True

        return self.get_state(), reward, self.done

    def render(self):
        """Displays job-machine assignments for debugging."""
        for machine in self.machines:
            print(f"Machine {machine.machine_id} -> Jobs: {machine.schedule}")


In [40]:
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
from collections import deque
import random

class DQNScheduler:
    """Deep Q-Network (DQN) Scheduler for RMS Job Scheduling."""

    def __init__(self, env, gamma=0.99, lr=0.001):
        self.env = env
        self.gamma = gamma
        self.learning_rate = lr
        self.model = self.build_model()
        self.optimizer = optim.Adam(self.model.parameters(), lr=self.learning_rate)
        self.loss_fn = nn.MSELoss()
        self.memory = deque(maxlen=1000)
        self.rewards_per_episode = []
        self.job_success_rates = []
        self.avg_wait_times = []
        self.machine_utilization = []

    def build_model(self):
        """Creates a neural network for decision-making."""
        state_size = len(self.env.get_state())
        return nn.Sequential(
            nn.Linear(state_size, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, len(self.env.machines))
        )

    def select_action(self, state):
        """Chooses the best machine for job assignment."""
        state_tensor = torch.tensor(state, dtype=torch.float32)
        with torch.no_grad():
            q_values = self.model(state_tensor)
        return torch.argmax(q_values).item()

    def train(self, episodes=500):
        """Trains the DQN model."""
        for episode in range(episodes):
            state = self.env.reset()
            done = False
            total_reward = 0

            while not done:
                action = self.select_action(state)
                next_state, reward, done = self.env.step(action)
                self.memory.append((state, action, reward, next_state, done))
                state = next_state
                total_reward += reward

            self.rewards_per_episode.append(total_reward)

    def evaluate(self):
        """Measures scheduling performance."""
        successful_jobs = sum(1 for job in self.env.jobs if job.start_time is not None)
        success_rate = (successful_jobs / len(self.env.jobs)) * 100
        avg_wait_time = np.mean([job.start_time for job in self.env.jobs if job.start_time is not None])

        utilization_rates = [sum(m.available_cpu for m in self.env.machines) / sum(m.max_cpu for m in self.env.machines) * 100]

        self.job_success_rates.append(success_rate)
        self.avg_wait_times.append(avg_wait_time)
        self.machine_utilization.append(utilization_rates[0])

    def visualize_comparison(self, labels):
        """Generates performance comparison plots."""
        fig, axes = plt.subplots(1, 3, figsize=(18, 5))

        axes[0].bar(labels, self.job_success_rates, color=['blue', 'red'])
        axes[0].set_title("Job Success Rate")

        axes[1].bar(labels, self.avg_wait_times, color=['green', 'purple'])
        axes[1].set_title("Average Job Waiting Time")

        axes[2].bar(labels, self.machine_utilization, color=['orange', 'brown'])
        axes[2].set_title("Machine Utilization Rate")

        plt.tight_layout()
        plt.show()


In [42]:
import numpy as np

class JobSchedulingEnv:
    """Reinforcement Learning Environment for Job Scheduling in Reconfigurable Manufacturing Systems (RMS)."""

    def __init__(self, jobs, machines, negotiation_enabled=True):
        self.jobs = jobs
        self.machines = machines
        self.current_job_index = 0
        self.done = False
        self.negotiation_enabled = negotiation_enabled
        self.job_reallocation_count = 0
        self.total_reconfiguration_time = 0
        self.total_energy_consumption = 0
        self.failure_recovery_time = 0

    def reset(self):
        """Resets the environment to its initial state."""
        self.current_job_index = 0
        self.done = False
        self.job_reallocation_count = 0
        self.total_reconfiguration_time = 0
        self.total_energy_consumption = 0
        self.failure_recovery_time = 0

        for machine in self.machines:
            machine.available_cpu = machine.max_cpu
            machine.available_memory = machine.max_memory
            machine.schedule = []
            machine.utilization_time = 0
            machine.failure_penalty = 0

        for job in self.jobs:
            job.current_operation = 0
            job.start_time = None
            job.waiting_time = 0
            job.failure_penalty = 0

        return self.get_state()

    def get_state(self):
        """Returns the current state representation (jobs + machines)."""
        if self.current_job_index >= len(self.jobs):
            return np.zeros(len(self.machines) * 2 + 5)

        job = self.jobs[self.current_job_index]

        if job.current_operation >= len(job.operations):
            job.current_operation = len(job.operations) - 1

        machine_status = np.array(
            [[m.available_cpu, m.available_memory] for m in self.machines]
        ).flatten()

        return np.concatenate((
            np.array([
                job.current_operation,
                job.operations[job.current_operation][0],
                job.operations[job.current_operation][1],
                job.operations[job.current_operation][2],
                job.operations[job.current_operation][3],
                job.priority
            ]), machine_status))

    def step(self, action):
        """Executes an action (machine selection) and updates the environment."""
        if self.current_job_index >= len(self.jobs):
            self.done = True
            return None, 0, self.done

        job = self.jobs[self.current_job_index]
        machine = self.machines[action]

        if self.negotiation_enabled:
            response = machine.respond_to_proposal(job.propose(machine))
        else:
            proposal = job.propose(machine)
            if proposal and proposal["cpu"] <= machine.available_cpu and proposal["memory"] <= machine.available_memory:
                response = ("ACCEPT", 0, 0, 0)
            else:
                response = ("REJECT",)

        if response[0] == "FAILURE":
            repair_time, failure_penalty = response[1], response[2]
            job.penalize_for_failure(failure_penalty)
            job.update_waiting_time(repair_time)
            self.failure_recovery_time += repair_time
            reward = -20
        elif response[0] == "ACCEPT":
            _, start_time, end_time, setup_time = response
            job.start_time = start_time
            self.total_reconfiguration_time += setup_time
            self.total_energy_consumption += job.operations[job.current_operation][2] + job.operations[job.current_operation][3]
            reward = 10 - setup_time
        elif response[0] == "COUNTER":
            reward = 5
        else:
            reward = -10

        self.current_job_index += 1
        if self.current_job_index >= len(self.jobs):
            self.done = True

        return self.get_state(), reward, self.done

    def render(self):
        """Displays job-machine assignments for debugging."""
        for machine in self.machines:
            print(f"Machine {machine.machine_id} -> Jobs: {machine.schedule}")


In [43]:
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
from collections import deque
import random

class DQNScheduler:
    """Deep Q-Network (DQN) Scheduler for RMS Job Scheduling."""

    def __init__(self, env, gamma=0.99, lr=0.001):
        self.env = env
        self.gamma = gamma
        self.learning_rate = lr
        self.model = self.build_model()
        self.optimizer = optim.Adam(self.model.parameters(), lr=self.learning_rate)
        self.loss_fn = nn.MSELoss()
        self.memory = deque(maxlen=1000)
        self.rewards_per_episode = []
        self.job_success_rates = []
        self.avg_wait_times = []
        self.machine_utilization = []
        self.energy_efficiency = []
        self.failure_recovery_performance = []
        self.reconfiguration_savings = []

    def build_model(self):
        state_size = len(self.env.get_state())
        return nn.Sequential(
            nn.Linear(state_size, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, len(self.env.machines))
        )

    def select_action(self, state):
        state_tensor = torch.tensor(state, dtype=torch.float32)
        with torch.no_grad():
            q_values = self.model(state_tensor)
        return torch.argmax(q_values).item()

    def train(self, episodes=500):
        for episode in range(episodes):
            state = self.env.reset()
            done = False
            total_reward = 0

            while not done:
                action = self.select_action(state)
                next_state, reward, done = self.env.step(action)
                self.memory.append((state, action, reward, next_state, done))
                state = next_state
                total_reward += reward

            self.rewards_per_episode.append(total_reward)

    def evaluate(self):
        successful_jobs = sum(1 for job in self.env.jobs if job.start_time is not None)
        success_rate = (successful_jobs / len(self.env.jobs)) * 100
        avg_wait_time = np.mean([job.start_time for job in self.env.jobs if job.start_time is not None])

        utilization_rates = [sum(m.available_cpu for m in self.env.machines) / sum(m.max_cpu for m in self.env.machines) * 100]

        self.job_success_rates.append(success_rate)
        self.avg_wait_times.append(avg_wait_time)
        self.machine_utilization.append(utilization_rates[0])
        self.energy_efficiency.append(self.env.total_energy_consumption / len(self.env.jobs))
        self.failure_recovery_performance.append(self.env.failure_recovery_time)
        self.reconfiguration_savings.append(self.env.total_reconfiguration_time)

    def visualize_comparison(self, labels):
        fig, axes = plt.subplots(1, 4, figsize=(18, 5))

        axes[0].bar(labels, self.energy_efficiency, color=['blue', 'red'])
        axes[0].set_title("Energy Efficiency")

        axes[1].bar(labels, self.failure_recovery_performance, color=['green', 'purple'])
        axes[1].set_title("Failure Recovery Time")

        axes[2].bar(labels, self.reconfiguration_savings, color=['orange', 'brown'])
        axes[2].set_title("Reconfiguration Savings")

        plt.tight_layout()
        plt.show()


In [45]:
import numpy as np

class JobSchedulingEnv:
    """Reinforcement Learning Environment for RMS Job Scheduling, tracking machine-specific metrics."""

    def __init__(self, jobs, machines, negotiation_enabled=True):
        self.jobs = jobs
        self.machines = machines
        self.current_job_index = 0
        self.done = False
        self.negotiation_enabled = negotiation_enabled

        # Machine-specific tracking
        self.machine_energy_usage = {m.machine_id: 0 for m in self.machines}
        self.machine_reconfig_time = {m.machine_id: 0 for m in self.machines}
        self.machine_failure_recovery = {m.machine_id: 0 for m in self.machines}
        self.machine_utilization = {m.machine_id: 0 for m in self.machines}

    def reset(self):
        """Resets the environment to its initial state."""
        self.current_job_index = 0
        self.done = False

        # Reset machine-specific tracking
        self.machine_energy_usage = {m.machine_id: 0 for m in self.machines}
        self.machine_reconfig_time = {m.machine_id: 0 for m in self.machines}
        self.machine_failure_recovery = {m.machine_id: 0 for m in self.machines}
        self.machine_utilization = {m.machine_id: 0 for m in self.machines}

        for machine in self.machines:
            machine.available_cpu = machine.max_cpu
            machine.available_memory = machine.max_memory
            machine.schedule = []
            machine.utilization_time = 0
            machine.failure_penalty = 0

        for job in self.jobs:
            job.current_operation = 0
            job.start_time = None
            job.waiting_time = 0
            job.failure_penalty = 0

        return self.get_state()

    def step(self, action):
        """Executes an action (machine selection) and updates the environment."""
        if self.current_job_index >= len(self.jobs):
            self.done = True
            return None, 0, self.done

        job = self.jobs[self.current_job_index]
        machine = self.machines[action]

        if self.negotiation_enabled:
            response = machine.respond_to_proposal(job.propose(machine))
        else:
            proposal = job.propose(machine)
            if proposal and proposal["cpu"] <= machine.available_cpu and proposal["memory"] <= machine.available_memory:
                response = ("ACCEPT", 0, 0, 0)
            else:
                response = ("REJECT",)

        if response[0] == "FAILURE":
            repair_time, failure_penalty = response[1], response[2]
            job.penalize_for_failure(failure_penalty)
            job.update_waiting_time(repair_time)
            self.machine_failure_recovery[machine.machine_id] += repair_time
            reward = -20
        elif response[0] == "ACCEPT":
            _, start_time, end_time, setup_time = response
            job.start_time = start_time
            self.machine_reconfig_time[machine.machine_id] += setup_time
            self.machine_energy_usage[machine.machine_id] += job.operations[job.current_operation][2] + job.operations[job.current_operation][3]
            self.machine_utilization[machine.machine_id] += end_time - start_time
            reward = 10 - setup_time
        elif response[0] == "COUNTER":
            reward = 5
        else:
            reward = -10

        self.current_job_index += 1
        if self.current_job_index >= len(self.jobs):
            self.done = True

        return self.get_state(), reward, self.done


In [60]:
import matplotlib.pyplot as plt

class DQNScheduler:
    """Deep Q-Network (DQN) Scheduler for RMS Job Scheduling, comparing machine-specific metrics."""

    def __init__(self, env, gamma=0.99, lr=0.001):
        self.env = env
        self.gamma = gamma
        self.learning_rate = lr
        self.model = self.build_model()
        self.rewards_per_episode = []
        self.machine_metrics = {
            "Energy Efficiency": {},
            "Reconfiguration Time": {},
            "Failure Recovery Time": {},
            "Utilization Rate": {}
        }

    def build_model(self):
        state_size = len(self.env.get_state())
        return nn.Sequential(
            nn.Linear(state_size, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, len(self.env.machines))
        )

    def train(self, episodes=500):
        for episode in range(episodes):
            state = self.env.reset()
            done = False
            total_reward = 0

            while not done:
                action = torch.argmax(self.model(torch.tensor(state, dtype=torch.float32))).item()
                next_state, reward, done = self.env.step(action)
                state = next_state
                total_reward += reward

            self.rewards_per_episode.append(total_reward)

    def evaluate(self):
        """Store machine-specific performance metrics."""
        for machine in self.env.machines:
            self.machine_metrics["Energy Efficiency"][machine.machine_id] = self.env.machine_energy_usage[machine.machine_id]
            self.machine_metrics["Reconfiguration Time"][machine.machine_id] = self.env.machine_reconfig_time[machine.machine_id]
            self.machine_metrics["Failure Recovery Time"][machine.machine_id] = self.env.machine_failure_recovery[machine.machine_id]
            self.machine_metrics["Utilization Rate"][machine.machine_id] = self.env.machine_utilization[machine.machine_id]

    def visualize_machine_performance(self):
        """Plots various performance metrics for each machine."""
        fig, axes = plt.subplots(2, 2, figsize=(14, 10))

        for i, (metric, data) in enumerate(self.machine_metrics.items()):
            machine_ids = list(data.keys())
            values = list(data.values())

            ax = axes[i // 2, i % 2]
            ax.bar(machine_ids, values)
            ax.set_title(metric)
            ax.set_xlabel("Machine ID")
            ax.set_ylabel(metric)

        plt.tight_layout()
        plt.show()


In [59]:
import numpy as np

class JobSchedulingEnv:
    """Reinforcement Learning Environment for RMS Job Scheduling, tracking machine-specific metrics."""

    def __init__(self, jobs, machines, negotiation_enabled=True):
        self.jobs = jobs
        self.machines = machines
        self.current_job_index = 0
        self.done = False
        self.negotiation_enabled = negotiation_enabled

        # Machine-specific tracking
        self.machine_energy_usage = {m.machine_id: 0 for m in self.machines}
        self.machine_reconfig_time = {m.machine_id: 0 for m in self.machines}
        self.machine_failure_recovery = {m.machine_id: 0 for m in self.machines}
        self.machine_utilization = {m.machine_id: 0 for m in self.machines}

    def reset(self):
        """Resets the environment to its initial state."""
        self.current_job_index = 0
        self.done = False

        # Reset machine-specific tracking
        self.machine_energy_usage = {m.machine_id: 0 for m in self.machines}
        self.machine_reconfig_time = {m.machine_id: 0 for m in self.machines}
        self.machine_failure_recovery = {m.machine_id: 0 for m in self.machines}
        self.machine_utilization = {m.machine_id: 0 for m in self.machines}

        for machine in self.machines:
            machine.available_cpu = machine.max_cpu
            machine.available_memory = machine.max_memory
            machine.schedule = []
            machine.utilization_time = 0
            machine.failure_penalty = 0

        for job in self.jobs:
            job.current_operation = 0
            job.start_time = None
            job.waiting_time = 0
            job.failure_penalty = 0

        return self.get_state()

    def get_state(self):
        """Returns the current state representation (jobs + machines)."""
        if self.current_job_index >= len(self.jobs):
            return np.zeros(len(self.machines) * 2 + 5)  # Dummy state when no jobs remain

        job = self.jobs[self.current_job_index]

        # Ensure the current operation index is within range
        if job.current_operation >= len(job.operations):
            job.current_operation = len(job.operations) - 1

        machine_status = np.array(
            [[m.available_cpu, m.available_memory] for m in self.machines]
        ).flatten()

        return np.concatenate((
            np.array([
                job.current_operation,
                job.operations[job.current_operation][0],  # Processing time
                job.operations[job.current_operation][1],  # Required Config
                job.operations[job.current_operation][2],  # CPU demand
                job.operations[job.current_operation][3],  # Memory demand
                job.priority
            ]), machine_status))

    def step(self, action):
        """Executes an action (machine selection) and updates the environment."""
        if self.current_job_index >= len(self.jobs):
            self.done = True
            return None, 0, self.done

        job = self.jobs[self.current_job_index]
        machine = self.machines[action]

        if self.negotiation_enabled:
            response = machine.respond_to_proposal(job.propose(machine))
        else:
            proposal = job.propose(machine)
            if proposal and proposal["cpu"] <= machine.available_cpu and proposal["memory"] <= machine.available_memory:
                response = ("ACCEPT", 0, 0, 0)
            else:
                response = ("REJECT",)

        if response[0] == "FAILURE":
            repair_time, failure_penalty = response[1], response[2]
            job.penalize_for_failure(failure_penalty)
            job.update_waiting_time(repair_time)
            self.machine_failure_recovery[machine.machine_id] += repair_time
            reward = -20
        elif response[0] == "ACCEPT":
            _, start_time, end_time, setup_time = response
            job.start_time = start_time
            self.machine_reconfig_time[machine.machine_id] += setup_time
            self.machine_energy_usage[machine.machine_id] += job.operations[job.current_operation][2] + job.operations[job.current_operation][3]
            self.machine_utilization[machine.machine_id] += end_time - start_time
            reward = 10 - setup_time
        elif response[0] == "COUNTER":
            reward = 5
        else:
            reward = -10

        self.current_job_index += 1
        if self.current_job_index >= len(self.jobs):
            self.done = True

        return self.get_state(), reward, self.done