<a href="https://colab.research.google.com/github/kshero18/Research-Project/blob/main/Final_Research_Project_Code_Part_A.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [7]:
!pip install simpy
!pip install ace_tools



In [8]:
import numpy as np
import simpy

# Define parameters
lambda_1 = 4  # Arrival rate for primary questions (customers per minute)
lambda_2 = 2  # Arrival rate for complex questions (customers per minute)
mu_1 = 3      # Service rate for less experienced agent (customers per minute)
mu_2 = 4      # Service rate for more experienced agent (customers per minute)
M = 5         # Maximum number of calls in the system
T = 40        # Simulation time horizon in minutes
dt = 0.1

overtime_cost_rate = 1  # Overtime cost rate per minute beyond threshold (agent working overtime)


class CallCenter:
    def __init__(self, env, dt, use_longest_waiting=True):
        self.env = env
        self.dt = dt
        self.use_longest_waiting = use_longest_waiting
        self.queue_1 = simpy.Store(env)  # Queue for primary questions
        self.queue_2 = simpy.Store(env)  # Queue for complex questions
        self.agent_1 = simpy.Resource(env, capacity=1)
        self.agent_2 = simpy.Resource(env, capacity=1)
        self.total_waiting_time = 0
        self.total_agent_overtime_cost = 0  # Track only agent overtime costs
        self.customers_served = 0
        self.transition_counts = {}  # Track state transitions
        self.state_counts = {}  # Count visits to states
        self.total_rewards = 0

    def record_transition(self, current_state, next_state):
        """Records state transitions for computing probabilities."""
        self.transition_counts[(current_state, next_state)] = (
            self.transition_counts.get((current_state, next_state), 0) + 1
        )
        self.state_counts[current_state] = self.state_counts.get(current_state, 0) + 1

    def calculate_transition_probabilities(self):
        """Calculates transition probabilities from recorded transitions."""
        probabilities = {}
        for (current_state, next_state), count in self.transition_counts.items():
            total_transitions_from_state = self.state_counts[current_state]
            probabilities[(current_state, next_state)] = count / total_transitions_from_state
        return probabilities

    def handle_customer(self, customer_type, agent, service_rate, arrival_time, state):
      with (self.agent_1 if agent == "Agent 1" else self.agent_2).request() as request:
          yield request
          service_time = np.random.exponential(1 / service_rate)
          end_time = self.env.now + service_time  # Calculate when the service will end

          # Simulate the service time
          yield self.env.timeout(service_time)

          wait_time = self.env.now - arrival_time  # Calculate wait time
          self.total_waiting_time += wait_time

          # Initialize agent_overtime_cost to zero
          agent_overtime_cost = 0

          # Check for overtime due to working beyond T
          if end_time > T:
              overtime_hours = end_time - T
              agent_overtime_cost = overtime_hours * overtime_cost_rate
              self.total_agent_overtime_cost += agent_overtime_cost

          self.customers_served += 1

          next_state = (len(self.queue_1.items), len(self.queue_2.items))

          # Record the state transition
          self.record_transition(state, next_state)

          # Log details
          print(
              f"At time {self.env.now:.2f}: Customer type {customer_type} served by {agent} "
              f"in {service_time:.2f} minutes, waited {wait_time:.2f} minutes "
              f"(Overtime cost: {agent_overtime_cost:.2f})"
          )
          print(f"    Transition: State {state} -> State {next_state}")

    def assign_customer(self):
        while True:
            if self.agent_1.count == 0 or self.agent_2.count == 0:
                queue, customer_type, arrival_time = self.select_customer()

                if queue is not None:
                    state = (len(self.queue_1.items), len(self.queue_2.items))  # Current state
                    yield queue.get()

                    chosen_agent = "Agent 1" if self.agent_1.count == 0 else "Agent 2"
                    service_rate = mu_1 if chosen_agent == "Agent 1" else mu_2

                    # Start handling the customer
                    self.env.process(self.handle_customer(customer_type, chosen_agent, service_rate, arrival_time, state))

            # Update rewards based on current queue lengths
            self.total_rewards += -(len(self.queue_1.items) + len(self.queue_2.items))

            yield self.env.timeout(dt)  # Allow other processes to run

    def select_customer(self):
        """Selects the customer based on the queue and waiting strategy."""
        if self.use_longest_waiting:
            return self.select_longest_waiting_customer()
        return self.select_first_come_first_served_customer()

    def select_longest_waiting_customer(self):
        """Select the longest waiting customer from the queues."""
        if len(self.queue_1.items) > 0 and len(self.queue_2.items) > 0:
            customer_1, arrival_time_1 = self.queue_1.items[0]
            customer_2, arrival_time_2 = self.queue_2.items[0]
            if arrival_time_1 <= arrival_time_2:
                return self.queue_1, customer_1, arrival_time_1
            else:
                return self.queue_2, customer_2, arrival_time_2
        elif len(self.queue_1.items) > 0:
            customer_1, arrival_time_1 = self.queue_1.items[0]
            return self.queue_1, customer_1, arrival_time_1
        elif len(self.queue_2.items) > 0:
            customer_2, arrival_time_2 = self.queue_2.items[0]
            return self.queue_2, customer_2, arrival_time_2
        else:
            return None, None, None

    def select_first_come_first_served_customer(self):
        """Select the first customer from queue 1 or queue 2."""
        if len(self.queue_1.items) > 0:
            return self.queue_1, *self.queue_1.items[0]
        elif len(self.queue_2.items) > 0:
            return self.queue_2, *self.queue_2.items[0]
        return None, None, None

    def customer_arrival(self, customer_type, arrival_rate):
        while True:
            inter_arrival_time = np.random.exponential(1 / arrival_rate)
            yield self.env.timeout(inter_arrival_time)
            arrival_time = self.env.now

            if customer_type == 1:
                self.queue_1.put((customer_type, arrival_time))
            else:
                self.queue_2.put((customer_type, arrival_time))

            self.env.process(self.assign_customer())


def run_simulation(simulation_time, use_longest_waiting=True):
    env = simpy.Environment()
    call_center = CallCenter(env, dt, use_longest_waiting)

    env.process(call_center.customer_arrival(1, lambda_1))
    env.process(call_center.customer_arrival(2, lambda_2))

    env.run(until=simulation_time)

    print(f"\nTotal customers served: {call_center.customers_served}")
    print(f"Total waiting time: {call_center.total_waiting_time:.2f} minutes")
    print(f"Total agent overtime cost: {call_center.total_agent_overtime_cost:.2f} minutes")
    print(f"Average waiting time per customer: "
          f"{(call_center.total_waiting_time / call_center.customers_served):.2f} minutes")

    # Calculate and display transition probabilities
    probabilities = call_center.calculate_transition_probabilities()
    print("\nTransition Probabilities:")
    for (initial_state, final_state), prob in probabilities.items():
        print(f"  P({final_state} | {initial_state}) = {prob:.4f}")

    return call_center.total_waiting_time, call_center.total_agent_overtime_cost, call_center.customers_served, probabilities


# Run the simulations
simulation_time = 60

# With longest waiting customer function
waiting_time_longest, agent_overtime_longest, served_longest, probs_longest = run_simulation(simulation_time, use_longest_waiting=True)

# Without longest waiting customer function
waiting_time_fcfs, agent_overtime_fcfs, served_fcfs, probs_fcfs = run_simulation(simulation_time, use_longest_waiting=False)

# Comparison of results
print("\nComparison of Waiting Times:")
print(f"With Longest Waiting Customer Function: {waiting_time_longest:.2f} minutes")
print(f"Without Longest Waiting Customer Function: {waiting_time_fcfs:.2f} minutes")
print(f"Difference in Waiting Time: {waiting_time_fcfs - waiting_time_longest:.2f} minutes")

print("\nComparison of Agent Overtime Costs:")
print(f"With Longest Waiting Customer Function: {agent_overtime_longest:.2f}")
print(f"Without Longest Waiting Customer Function: {agent_overtime_fcfs:.2f}")
print(f"Difference in Agent Overtime Cost: {agent_overtime_fcfs - agent_overtime_longest:.2f}")

#######################################################3

import pandas as pd

# Prepare the results for the table
results_data = {
    "Metric": [
        "Total Waiting Time (minutes)",
        "Total Agent Overtime Cost",
        "Total Customers Served"
    ],
    "With Longest Waiting": [
        waiting_time_longest,
        agent_overtime_longest,
        served_longest
    ],
    "Without Longest Waiting": [
        waiting_time_fcfs,
        agent_overtime_fcfs,
        served_fcfs
    ],
    "Difference (Without - With)": [
        waiting_time_fcfs - waiting_time_longest,
        agent_overtime_fcfs - agent_overtime_longest,
        served_fcfs - served_longest
    ]
}

# Create the DataFrame
results_df = pd.DataFrame(results_data)

# Display the results as a table
print(results_df.to_string(index=False))




At time 0.09: Customer type 2 served by Agent 1 in 0.07 minutes, waited 0.07 minutes (Overtime cost: 0.00)
    Transition: State (0, 1) -> State (0, 0)
At time 0.19: Customer type 1 served by Agent 1 in 0.02 minutes, waited 0.02 minutes (Overtime cost: 0.00)
    Transition: State (1, 0) -> State (0, 0)
At time 0.60: Customer type 1 served by Agent 2 in 0.21 minutes, waited 0.21 minutes (Overtime cost: 0.00)
    Transition: State (1, 0) -> State (0, 0)
At time 0.75: Customer type 1 served by Agent 2 in 0.00 minutes, waited 0.00 minutes (Overtime cost: 0.00)
    Transition: State (1, 0) -> State (0, 0)
At time 1.24: Customer type 1 served by Agent 1 in 0.94 minutes, waited 0.94 minutes (Overtime cost: 0.00)
    Transition: State (1, 0) -> State (0, 0)
At time 1.30: Customer type 2 served by Agent 1 in 0.04 minutes, waited 0.04 minutes (Overtime cost: 0.00)
    Transition: State (0, 1) -> State (0, 0)
At time 1.48: Customer type 2 served by Agent 2 in 0.32 minutes, waited 0.32 minutes (Ov

In [15]:
import numpy as np
import simpy
import pandas as pd

# Define parameters
lambda_1 = 4  # Arrival rate for primary questions (customers per minute)
lambda_2 = 2  # Arrival rate for complex questions (customers per minute)
mu_1 = 3      # Service rate for less experienced agent (customers per minute)
mu_2 = 4      # Service rate for more experienced agent (customers per minute)
# M = 5         # Maximum number of calls in the system
T = 40        # Simulation time horizon in minutes
dt = 0.6

overtime_cost_rate = 1  # Overtime cost rate per minute beyond threshold (agent working overtime)

class CallCenter:
    def __init__(self, env, dt, use_longest_waiting=True):
        self.env = env
        self.dt = dt
        self.use_longest_waiting = use_longest_waiting
        self.queue_1 = simpy.Store(env)  # Queue for primary questions
        self.queue_2 = simpy.Store(env)  # Queue for complex questions
        self.agent_1 = simpy.Resource(env, capacity=1)
        self.agent_2 = simpy.Resource(env, capacity=1)
        self.total_waiting_time = 0
        self.total_agent_overtime_cost = 0  # Track only agent overtime costs
        self.total_rewards = 0  # Track total rewards
        self.customers_served = 0
        self.transition_counts = {}  # Track state transitions
        self.state_counts = {}  # Count visits to states

    def record_transition(self, current_state, next_state):
        """Records state transitions for computing probabilities."""
        self.transition_counts[(current_state, next_state)] = (
            self.transition_counts.get((current_state, next_state), 0) + 1
        )
        self.state_counts[current_state] = self.state_counts.get(current_state, 0) + 1

    def calculate_transition_probabilities(self):
        """Calculates transition probabilities from recorded transitions."""
        probabilities = {}
        for (current_state, next_state), count in self.transition_counts.items():
            total_transitions_from_state = self.state_counts[current_state]
            probabilities[(current_state, next_state)] = count / total_transitions_from_state
        return probabilities

    def handle_customer(self, customer_type, agent, service_rate, arrival_time, state):
        with (self.agent_1 if agent == "Agent 1" else self.agent_2).request() as request:
            yield request
            service_time = np.random.exponential(1 / service_rate)
            end_time = self.env.now + service_time  # Calculate when the service will end

            # Simulate the service time
            yield self.env.timeout(service_time)

            wait_time = self.env.now - arrival_time  # Calculate wait time
            self.total_waiting_time += wait_time

            # Initialize agent_overtime_cost to zero
            agent_overtime_cost = 0

            # Check for overtime due to working beyond T
            if end_time > T:
                overtime_hours = end_time - T
                agent_overtime_cost = overtime_hours * overtime_cost_rate
                self.total_agent_overtime_cost += agent_overtime_cost

            # Update rewards
            reward = -(wait_time + agent_overtime_cost)  # Negative rewards for wait time and overtime
            self.total_rewards += reward

            self.customers_served += 1

            next_state = (len(self.queue_1.items), len(self.queue_2.items))

            # Record the state transition
            self.record_transition(state, next_state)

            # Log details
            print(
                f"At time {self.env.now:.2f}: Customer type {customer_type} served by {agent} "
                f"in {service_time:.2f} minutes, waited {wait_time:.2f} minutes "
                f"(Overtime cost: {agent_overtime_cost:.2f}, Reward: {reward:.2f})"
            )
            print(f"    Transition: State {state} -> State {next_state}")

    def assign_customer(self):
        while True:
            if self.agent_1.count == 0 or self.agent_2.count == 0:
                queue, customer_type, arrival_time = self.select_customer()

                if queue is not None:
                    state = (len(self.queue_1.items), len(self.queue_2.items))  # Current state
                    yield queue.get()

                    chosen_agent = "Agent 1" if self.agent_1.count == 0 else "Agent 2"
                    service_rate = mu_1 if chosen_agent == "Agent 1" else mu_2

                    # Start handling the customer
                    self.env.process(self.handle_customer(customer_type, chosen_agent, service_rate, arrival_time, state))

            # Update rewards based on current queue lengths
            queue_penalty = -(len(self.queue_1.items) + len(self.queue_2.items))  # Negative reward for queue lengths
            self.total_rewards += queue_penalty

            yield self.env.timeout(dt)  # Allow other processes to run

    def select_customer(self):
        """Selects the customer based on the queue and waiting strategy."""
        if self.use_longest_waiting:
            return self.select_longest_waiting_customer()
        return self.select_first_come_first_served_customer()

    def select_longest_waiting_customer(self):
        """Select the longest waiting customer from the queues."""
        if len(self.queue_1.items) > 0 and len(self.queue_2.items) > 0:
            customer_1, arrival_time_1 = self.queue_1.items[0]
            customer_2, arrival_time_2 = self.queue_2.items[0]
            if arrival_time_1 <= arrival_time_2:
                return self.queue_1, customer_1, arrival_time_1
            else:
                return self.queue_2, customer_2, arrival_time_2
        elif len(self.queue_1.items) > 0:
            customer_1, arrival_time_1 = self.queue_1.items[0]
            return self.queue_1, customer_1, arrival_time_1
        elif len(self.queue_2.items) > 0:
            customer_2, arrival_time_2 = self.queue_2.items[0]
            return self.queue_2, customer_2, arrival_time_2
        else:
            return None, None, None

    def select_first_come_first_served_customer(self):
        """Select the first customer from queue 1 or queue 2."""
        if len(self.queue_1.items) > 0:
            return self.queue_1, *self.queue_1.items[0]
        elif len(self.queue_2.items) > 0:
            return self.queue_2, *self.queue_2.items[0]
        return None, None, None

    def customer_arrival(self, customer_type, arrival_rate):
        while True:
            inter_arrival_time = np.random.exponential(1 / arrival_rate)
            yield self.env.timeout(inter_arrival_time)
            arrival_time = self.env.now

            if customer_type == 1:
                self.queue_1.put((customer_type, arrival_time))
            else:
                self.queue_2.put((customer_type, arrival_time))

            self.env.process(self.assign_customer())


def run_simulation(simulation_time, use_longest_waiting=True):
    env = simpy.Environment()
    call_center = CallCenter(env, dt, use_longest_waiting)

    env.process(call_center.customer_arrival(1, lambda_1))
    env.process(call_center.customer_arrival(2, lambda_2))

    env.run(until=simulation_time)

    print(f"\nTotal customers served: {call_center.customers_served}")
    print(f"Total waiting time: {call_center.total_waiting_time:.2f} minutes")
    print(f"Total agent overtime cost: {call_center.total_agent_overtime_cost:.2f} minutes")
    print(f"Total rewards: {call_center.total_rewards:.2f}")
    print(f"Average waiting time per customer: "
          f"{(call_center.total_waiting_time / call_center.customers_served):.2f} minutes")

    # Calculate and display transition probabilities
    probabilities = call_center.calculate_transition_probabilities()
    print("\nTransition Probabilities:")
    for (initial_state, final_state), prob in probabilities.items():
        print(f"  P({final_state} | {initial_state}) = {prob:.4f}")

    return call_center.total_waiting_time, call_center.total_agent_overtime_cost, call_center.customers_served, call_center.total_rewards, probabilities


# Run the simulations
simulation_time = 60

# With longest waiting customer function
waiting_time_longest, agent_overtime_longest, served_longest, rewards_longest, probs_longest = run_simulation(simulation_time, use_longest_waiting=True)

# Without longest waiting customer function
waiting_time_fcfs, agent_overtime_fcfs, served_fcfs, rewards_fcfs, probs_fcfs = run_simulation(simulation_time, use_longest_waiting=False)

# Prepare the results for the table
results_data = {
    "Metric": [
        "Total Waiting Time (minutes)",
        "Total Agent Overtime Cost",
        "Total Customers Served",
        "Total Rewards"
    ],
    "With Longest Waiting": [
        waiting_time_longest,
        agent_overtime_longest,
        served_longest,
        rewards_longest
    ],
    "Without Longest Waiting": [
        waiting_time_fcfs,
        agent_overtime_fcfs,
        served_fcfs,
        rewards_fcfs
    ],
    "Difference (Without - With)": [
        waiting_time_fcfs - waiting_time_longest,
        agent_overtime_fcfs - agent_overtime_longest,
        served_fcfs - served_longest,
        rewards_fcfs - rewards_longest
    ]
}

# Create the DataFrame
results_df = pd.DataFrame(results_data)

# Display the results as a table
print(results_df.to_string(index=False))


At time 0.31: Customer type 2 served by Agent 1 in 0.14 minutes, waited 0.14 minutes (Overtime cost: 0.00, Reward: -0.14)
    Transition: State (0, 1) -> State (0, 0)
At time 0.59: Customer type 1 served by Agent 2 in 0.41 minutes, waited 0.41 minutes (Overtime cost: 0.00, Reward: -0.41)
    Transition: State (1, 0) -> State (0, 2)
At time 0.86: Customer type 1 served by Agent 1 in 0.50 minutes, waited 0.50 minutes (Overtime cost: 0.00, Reward: -0.50)
    Transition: State (1, 0) -> State (0, 1)
At time 1.10: Customer type 2 served by Agent 2 in 0.32 minutes, waited 0.65 minutes (Overtime cost: 0.00, Reward: -0.65)
    Transition: State (0, 2) -> State (0, 0)
At time 1.31: Customer type 2 served by Agent 1 in 0.36 minutes, waited 0.83 minutes (Overtime cost: 0.00, Reward: -0.83)
    Transition: State (0, 1) -> State (1, 1)
At time 1.39: Customer type 1 served by Agent 2 in 0.12 minutes, waited 0.12 minutes (Overtime cost: 0.00, Reward: -0.12)
    Transition: State (1, 0) -> State (1, 1

No new customers are accepted after 40 minutes and the simulation runs till 60 minutes giving time to the agents to serve the remaining customers in the queue. This is the main model

In [64]:
import numpy as np
import simpy
import pandas as pd

# Define parameters
lambda_1 = 4  # Arrival rate for primary questions (customers per minute)
lambda_2 = 2  # Arrival rate for complex questions (customers per minute)
mu_1 = 3      # Service rate for less experienced agent (customers per minute)
mu_2 = 4      # Service rate for more experienced agent (customers per minute)
M = 5         # Maximum number of calls in the system
T = 40        # Simulation time threshold for accepting new customers
simulation_end_time = 60  # Total simulation time
dt = 0.6

overtime_cost_rate = 1  # Overtime cost rate per minute beyond threshold (agent working overtime)

# Precomputing the value function V using DPL
V = np.full((int(T/dt) + 1, M + 1, M + 1, 2), np.inf)
V[:, 0, 0, :] = 0  # Initial condition: no waiting cost when there are no customers

for t in range(1, int(T/dt) + 1):
    for q1 in range(M + 1):
        for q2 in range(M + 1):
            if q1 + q2 > M:
                continue
            cost_rep1 = (q1 + 1) / mu_1 + q2 / mu_2 if q1 + 1 <= M else np.inf  # Expected cost if assigned to rep 1
            cost_rep2 = q1 / mu_1 + (q2 + 1) / mu_2 if q2 + 1 <= M else np.inf  # Expected cost if assigned to rep 2
            V[t, q1, q2, 0] = min(cost_rep1, cost_rep2)

class CallCenter:
    def __init__(self, env, dt, V, use_longest_waiting=True):
        self.env = env
        self.V = V
        self.dt = dt
        self.use_longest_waiting = use_longest_waiting
        self.queue_1 = simpy.Store(env)  # Queue for primary questions
        self.queue_2 = simpy.Store(env)  # Queue for complex questions
        self.agent_1 = simpy.Resource(env, capacity=1)
        self.agent_2 = simpy.Resource(env, capacity=1)
        self.total_waiting_time = 0
        self.total_agent_overtime_cost = 0  # Track only agent overtime costs
        self.total_rewards = 0  # Track total rewards
        self.customers_served = 0
        self.customers_rejected = 0  # Track customers rejected after T = 40
        self.transition_counts = {}  # Track state transitions
        self.state_counts = {}  # Count visits to states

    def record_transition(self, current_state, next_state):
        """Records state transitions for computing probabilities."""
        self.transition_counts[(current_state, next_state)] = (
            self.transition_counts.get((current_state, next_state), 0) + 1
        )
        self.state_counts[current_state] = self.state_counts.get(current_state, 0) + 1

    def calculate_transition_probabilities(self):
        """Calculates transition probabilities from recorded transitions."""
        probabilities = {}
        for (current_state, next_state), count in self.transition_counts.items():
            total_transitions_from_state = self.state_counts[current_state]
            probabilities[(current_state, next_state)] = count / total_transitions_from_state
        return probabilities

    def handle_customer(self, customer_type, agent, service_rate, arrival_time, state):
        with (self.agent_1 if agent == "Agent 1" else self.agent_2).request() as request:
            yield request
            service_time = np.random.exponential(1 / service_rate)
            end_time = self.env.now + service_time  # Calculate when the service will end

            # Simulate the service time
            yield self.env.timeout(service_time)

            wait_time = self.env.now - arrival_time  # Calculate wait time
            self.total_waiting_time += wait_time

            # Initialize agent_overtime_cost to zero
            agent_overtime_cost = 0

            # Check for overtime due to working beyond T
            if end_time > T:
                overtime_hours = end_time - T
                agent_overtime_cost = overtime_hours * overtime_cost_rate
                self.total_agent_overtime_cost += agent_overtime_cost

            # Update rewards
            reward = -(wait_time + agent_overtime_cost)  # Negative rewards for wait time and overtime
            self.total_rewards += reward

            self.customers_served += 1

            next_state = (len(self.queue_1.items), len(self.queue_2.items))

            # Record the state transition
            self.record_transition(state, next_state)

            # Log details
            print(
                f"At time {self.env.now:.2f}: Customer type {customer_type} served by {agent} "
                f"in {service_time:.2f} minutes, waited {wait_time:.2f} minutes "
                f"(Overtime cost: {agent_overtime_cost:.2f}, Reward: {reward:.2f})"
            )
            print(f"    Transition: State {state} -> State {next_state}")

    def assign_customer(self):
        while True:
            if self.agent_1.count == 0 or self.agent_2.count == 0:
                state = (len(self.queue_1.items), len(self.queue_2.items))

                # Select action based on policy
                chosen_agent = self.policy.get(state, "Agent 1")
                queue = self.queue_1 if chosen_agent == "Agent 1" else self.queue_2

                if len(queue.items) > 0:
                    customer_type, arrival_time = queue.items[0]
                    yield queue.get()

                    service_rate = mu_1 if chosen_agent == "Agent 1" else mu_2
                    self.env.process(self.handle_customer(customer_type, chosen_agent, service_rate, arrival_time, state))

            yield self.env.timeout(self.dt)

    def assign_customer(self):
        while True:
            if self.agent_1.count == 0 or self.agent_2.count == 0:
                queue, customer_type, arrival_time = self.select_customer()

                if queue is not None:
                    state = (len(self.queue_1.items), len(self.queue_2.items))  # Current state
                    yield queue.get()

                    chosen_agent = "Agent 1" if self.agent_1.count == 0 else "Agent 2"
                    service_rate = mu_1 if chosen_agent == "Agent 1" else mu_2

                    # Start handling the customer
                    self.env.process(self.handle_customer(customer_type, chosen_agent, service_rate, arrival_time, state))

            # Update rewards based on current queue lengths
            queue_penalty = -(len(self.queue_1.items) + len(self.queue_2.items))  # Negative reward for queue lengths
            self.total_rewards += queue_penalty

            yield self.env.timeout(dt)  # Allow other processes to run

    def select_customer(self):
        """Selects the customer based on the queue and waiting strategy."""
        if self.use_longest_waiting:
            return self.select_longest_waiting_customer()
        return self.select_first_come_first_served_customer()

    def select_longest_waiting_customer(self):
        """Select the longest waiting customer from the queues."""
        if len(self.queue_1.items) > 0 and len(self.queue_2.items) > 0:
            customer_1, arrival_time_1 = self.queue_1.items[0]
            customer_2, arrival_time_2 = self.queue_2.items[0]
            if arrival_time_1 <= arrival_time_2:
                return self.queue_1, customer_1, arrival_time_1
            else:
                return self.queue_2, customer_2, arrival_time_2
        elif len(self.queue_1.items) > 0:
            customer_1, arrival_time_1 = self.queue_1.items[0]
            return self.queue_1, customer_1, arrival_time_1
        elif len(self.queue_2.items) > 0:
            customer_2, arrival_time_2 = self.queue_2.items[0]
            return self.queue_2, customer_2, arrival_time_2
        else:
            return None, None, None

    def select_first_come_first_served_customer(self):
        """Select the first customer from queue 1 or queue 2."""
        if len(self.queue_1.items) > 0:
            return self.queue_1, *self.queue_1.items[0]
        elif len(self.queue_2.items) > 0:
            return self.queue_2, *self.queue_2.items[0]
        return None, None, None

    def value_iteration(env, gamma=0.9, iterations=100):
      V = {}
      policy = {}

      # Initialize value function for all states
      for q1 in range(M + 1):
          for q2 in range(M + 1):
              V[(q1, q2)] = 0  # Initial value for all states

      for i in range(iterations):
          new_V = V.copy()
          for q1 in range(M + 1):
              for q2 in range(M + 1):
                  if q1 + q2 > M:  # Invalid state
                      continue

                  # Possible actions: assign to Agent 1 or Agent 2
                  actions = ["Agent 1", "Agent 2"]
                  action_values = []

                  for action in actions:
                      # Immediate reward
                      immediate_cost = -(q1 / mu_1 if action == "Agent 1" else q2 / mu_2)

                      # Future value estimation
                      future_value = 0
                      for next_q1 in range(M + 1):
                          for next_q2 in range(M + 1):
                              if next_q1 + next_q2 > M:  # Invalid state
                                  continue
                              # Transition probability (placeholder: uniform probability)
                              P = 1 / ((M + 1) * (M + 1))  # Example; calculate based on transitions
                              future_value += P * V[(next_q1, next_q2)]

                      action_value = immediate_cost + gamma * future_value
                      action_values.append(action_value)

                  # Update value function and policy
                  new_V[(q1, q2)] = max(action_values)
                  policy[(q1, q2)] = actions[np.argmax(action_values)]

          V = new_V  # Update V for next iteration

      return V, policy

    def customer_arrival(self, customer_type, arrival_rate):
        while True:
            inter_arrival_time = np.random.exponential(1 / arrival_rate)
            yield self.env.timeout(inter_arrival_time)
            arrival_time = self.env.now

            if arrival_time > T:
                self.customers_rejected += 1
                print(f"Customer rejected at time {arrival_time:.2f}: No new arrivals after T = {T}")
                continue

            if customer_type == 1:
                self.queue_1.put((customer_type, arrival_time))
            else:
                self.queue_2.put((customer_type, arrival_time))

            self.env.process(self.assign_customer())


def run_simulation(simulation_time, use_longest_waiting=True):
    env = simpy.Environment()
    call_center = CallCenter(env, dt, V, use_longest_waiting)

    env.process(call_center.customer_arrival(1, lambda_1))
    env.process(call_center.customer_arrival(2, lambda_2))

    env.run(until=simulation_time)

    print(f"\nTotal customers served: {call_center.customers_served}")
    print(f"Total waiting time: {call_center.total_waiting_time:.2f} minutes")
    print(f"Total agent overtime cost: {call_center.total_agent_overtime_cost:.2f} minutes")
    print(f"Total rewards: {call_center.total_rewards:.2f}")
    print(f"Total customers rejected after T = {T}: {call_center.customers_rejected}")
    print(f"Average waiting time per customer: "
          f"{(call_center.total_waiting_time / call_center.customers_served):.2f} minutes")

    # Calculate and display transition probabilities
    probabilities = call_center.calculate_transition_probabilities()
    print("\nTransition Probabilities:")
    for (initial_state, final_state), prob in probabilities.items():
        print(f"  P({final_state} | {initial_state}) = {prob:.4f}")

    return call_center.total_waiting_time, call_center.total_agent_overtime_cost, call_center.customers_served, call_center.total_rewards, call_center.customers_rejected, probabilities


# Run the simulations
simulation_time = simulation_end_time

# With longest waiting customer function
waiting_time_longest, agent_overtime_longest, served_longest, rewards_longest, rejected_longest, probs_longest = run_simulation(simulation_time, use_longest_waiting=True)

# Without longest waiting customer function
waiting_time_fcfs, agent_overtime_fcfs, served_fcfs, rewards_fcfs, rejected_fcfs, probs_fcfs = run_simulation(simulation_time, use_longest_waiting=False)


# Prepare the results for the table
results_data = {
    "Metric": [
        "Total Waiting Time (minutes)",
        "Total Agent Overtime Cost",
        "Total Customers Served",
        "Total Rewards",
        "Total Customers Rejected"
    ],
    "With Longest Waiting": [
        waiting_time_longest,
        agent_overtime_longest,
        served_longest,
        rewards_longest,
        rejected_longest
    ],
    "Without Longest Waiting": [
        waiting_time_fcfs,
        agent_overtime_fcfs,
        served_fcfs,
        rewards_fcfs,
        rejected_fcfs
    ],
    "Difference (Without - With)": [
        waiting_time_fcfs - waiting_time_longest,
        agent_overtime_fcfs - agent_overtime_longest,
        served_fcfs - served_longest,
        rewards_fcfs - rewards_longest,
        rejected_fcfs - rejected_longest
    ]
}

# Create the DataFrame
results_df = pd.DataFrame(results_data)

# Display the results as a table
print(results_df.to_string(index=False))



At time 0.24: Customer type 2 served by Agent 1 in 0.02 minutes, waited 0.02 minutes (Overtime cost: 0.00, Reward: -0.02)
    Transition: State (0, 1) -> State (0, 0)
At time 0.39: Customer type 1 served by Agent 2 in 0.15 minutes, waited 0.15 minutes (Overtime cost: 0.00, Reward: -0.15)
    Transition: State (1, 0) -> State (3, 0)
At time 0.51: Customer type 1 served by Agent 1 in 0.27 minutes, waited 0.27 minutes (Overtime cost: 0.00, Reward: -0.27)
    Transition: State (1, 0) -> State (3, 0)
At time 0.59: Customer type 1 served by Agent 2 in 0.10 minutes, waited 0.29 minutes (Overtime cost: 0.00, Reward: -0.29)
    Transition: State (4, 0) -> State (3, 0)
At time 1.12: Customer type 1 served by Agent 1 in 0.44 minutes, waited 0.80 minutes (Overtime cost: 0.00, Reward: -0.80)
    Transition: State (4, 0) -> State (2, 0)
At time 1.25: Customer type 1 served by Agent 2 in 0.43 minutes, waited 0.92 minutes (Overtime cost: 0.00, Reward: -0.92)
    Transition: State (3, 0) -> State (1, 1

In [None]:
# import numpy as np
# import simpy
# import pandas as pd

# # Define parameters
# lambda_1 = 2  # Arrival rate for primary questions (customers per minute)
# lambda_2 = 2  # Arrival rate for complex questions (customers per minute)
# mu_1 = 3      # Service rate for less experienced agent (customers per minute)
# mu_2 = 4      # Service rate for more experienced agent (customers per minute)
# M = 6         # Maximum number of calls in the system
# T = 40        # Simulation time threshold for accepting new customers
# simulation_end_time = 60  # Total simulation time
# dt = 0.1

# overtime_cost_rate = 1  # Overtime cost rate per minute beyond threshold (agent working overtime)

# # Precomputing the value function V using DPL
# V = np.full((int(T/dt) + 1, M + 1, M + 1, 2), np.inf)
# V[:, 0, 0, :] = 0  # Initial condition: no waiting cost when there are no customers

# for t in range(1, int(T/dt) + 1):
#     for q1 in range(M + 1):
#         for q2 in range(M + 1):
#             if q1 + q2 > M:
#                 continue
#             cost_rep1 = (q1 + 1) / mu_1 + q2 / mu_2 if q1 + 1 <= M else np.inf  # Expected cost if assigned to rep 1
#             cost_rep2 = q1 / mu_1 + (q2 + 1) / mu_2 if q2 + 1 <= M else np.inf  # Expected cost if assigned to rep 2
#             V[t, q1, q2, 0] = min(cost_rep1, cost_rep2)

# class CallCenter:
#     def __init__(self, env, dt, V, use_longest_waiting=True):
#         self.env = env
#         self.V = V
#         self.dt = dt
#         self.use_longest_waiting = use_longest_waiting
#         self.queue_1 = simpy.Store(env)  # Queue for primary questions
#         self.queue_2 = simpy.Store(env)  # Queue for complex questions
#         self.agent_1 = simpy.Resource(env, capacity=1)
#         self.agent_2 = simpy.Resource(env, capacity=1)
#         self.total_waiting_time = 0
#         self.total_agent_overtime_cost = 0  # Track only agent overtime costs
#         self.total_rewards = 0  # Track total rewards
#         self.customers_served = 0
#         self.customers_rejected = 0  # Track customers rejected after T = 40
#         self.transition_counts = {}  # Track state transitions
#         self.state_counts = {}  # Count visits to states
#         self.value_function, self.policy = self.value_iteration()

#     def record_transition(self, current_state, next_state):
#         """Records state transitions for computing probabilities."""
#         self.transition_counts[(current_state, next_state)] = (
#             self.transition_counts.get((current_state, next_state), 0) + 1
#         )
#         self.state_counts[current_state] = self.state_counts.get(current_state, 0) + 1

#     def calculate_transition_probabilities(self):
#         """Calculates transition probabilities from recorded transitions."""
#         probabilities = {}
#         for (current_state, next_state), count in self.transition_counts.items():
#             total_transitions_from_state = self.state_counts[current_state]
#             probabilities[(current_state, next_state)] = count / total_transitions_from_state
#         return probabilities

#     def value_iteration(env, gamma=0.9, iterations=100):
#       V = {}
#       policy = {}

#       # Initialize value function for all states
#       for q1 in range(M + 1):
#           for q2 in range(M + 1):
#               V[(q1, q2)] = 0  # Initial value for all states

#       for i in range(iterations):
#           new_V = V.copy()
#           for q1 in range(M + 1):
#               for q2 in range(M + 1):
#                   if q1 + q2 > M:  # Invalid state
#                       continue

#                   # Possible actions: assign to Agent 1 or Agent 2
#                   actions = ["Agent 1", "Agent 2"]
#                   action_values = []

#                   for action in actions:
#                       # Immediate reward
#                       immediate_cost = -(q1 / mu_1 if action == "Agent 1" else q2 / mu_2)

#                       # Future value estimation
#                       future_value = 0
#                       for next_q1 in range(M + 1):
#                           for next_q2 in range(M + 1):
#                               if next_q1 + next_q2 > M:  # Invalid state
#                                   continue
#                               # Transition probability (placeholder: uniform probability)
#                               P = 1 / ((M + 1) * (M + 1))  # Example; calculate based on transitions
#                               future_value += P * V[(next_q1, next_q2)]

#                       action_value = immediate_cost + gamma * future_value
#                       action_values.append(action_value)

#                   # Update value function and policy
#                   new_V[(q1, q2)] = max(action_values)
#                   policy[(q1, q2)] = actions[np.argmax(action_values)]

#           V = new_V  # Update V for next iteration

#       return V, policy

#     def handle_customer(self, customer_type, agent, service_rate, arrival_time, state):
#         with (self.agent_1 if agent == "Agent 1" else self.agent_2).request() as request:
#             yield request
#             service_time = np.random.exponential(1 / service_rate)
#             end_time = self.env.now + service_time  # Calculate when the service will end

#             # Simulate the service time
#             yield self.env.timeout(service_time)

#             wait_time = self.env.now - arrival_time  # Calculate wait time
#             self.total_waiting_time += wait_time

#             # Initialize agent_overtime_cost to zero
#             agent_overtime_cost = 0

#             # Check for overtime due to working beyond T
#             if end_time > T:
#                 overtime_hours = end_time - T
#                 agent_overtime_cost = overtime_hours * overtime_cost_rate
#                 self.total_agent_overtime_cost += agent_overtime_cost

#             # Update rewards
#             reward = -(wait_time + agent_overtime_cost)  # Negative rewards for wait time and overtime
#             self.total_rewards += reward

#             self.customers_served += 1

#             next_state = (len(self.queue_1.items), len(self.queue_2.items))

#             # Record the state transition
#             self.record_transition(state, next_state)

#             # Log details
#             print(
#                 f"At time {self.env.now:.2f}: Customer type {customer_type} served by {agent} "
#                 f"in {service_time:.2f} minutes, waited {wait_time:.2f} minutes "
#                 f"(Overtime cost: {agent_overtime_cost:.2f}, Reward: {reward:.2f})"
#             )
#             print(f"    Transition: State {state} -> State {next_state}")

#     def assign_customer(self):
#         while True:
#             if self.agent_1.count == 0 or self.agent_2.count == 0:
#                 state = (len(self.queue_1.items), len(self.queue_2.items))

#                 # Select action based on policy
#                 chosen_agent = self.policy.get(state, "Agent 1")
#                 queue = self.queue_1 if chosen_agent == "Agent 1" else self.queue_2

#                 if len(queue.items) > 0:
#                     customer_type, arrival_time = queue.items[0]
#                     yield queue.get()

#                     service_rate = mu_1 if chosen_agent == "Agent 1" else mu_2
#                     self.env.process(self.handle_customer(customer_type, chosen_agent, service_rate, arrival_time, state))

#             queue_penalty = -(len(self.queue_1.items) + len(self.queue_2.items))  # Negative reward for queue lengths
#             self.total_rewards += queue_penalty

#             yield self.env.timeout(self.dt)

#     # def assign_customer(self):
#     #     while True:
#     #         if self.agent_1.count == 0 or self.agent_2.count == 0:
#     #             queue, customer_type, arrival_time = self.select_customer()

#     #             if queue is not None:
#     #                 state = (len(self.queue_1.items), len(self.queue_2.items))  # Current state
#     #                 yield queue.get()

#     #                 chosen_agent = "Agent 1" if self.agent_1.count == 0 else "Agent 2"
#     #                 service_rate = mu_1 if chosen_agent == "Agent 1" else mu_2

#     #                 # Start handling the customer
#     #                 self.env.process(self.handle_customer(customer_type, chosen_agent, service_rate, arrival_time, state))

#     #         # Update rewards based on current queue lengths
#     #         queue_penalty = -(len(self.queue_1.items) + len(self.queue_2.items))  # Negative reward for queue lengths
#     #         self.total_rewards += queue_penalty

#     #         yield self.env.timeout(dt)  # Allow other processes to run

#     def select_customer(self):
#         """Selects the customer based on the queue and waiting strategy."""
#         if self.use_longest_waiting:
#             return self.select_longest_waiting_customer()
#         return self.select_first_come_first_served_customer()

#     def select_longest_waiting_customer(self):
#         """Select the longest waiting customer from the queues."""
#         if len(self.queue_1.items) > 0 and len(self.queue_2.items) > 0:
#             customer_1, arrival_time_1 = self.queue_1.items[0]
#             customer_2, arrival_time_2 = self.queue_2.items[0]
#             if arrival_time_1 <= arrival_time_2:
#                 return self.queue_1, customer_1, arrival_time_1
#             else:
#                 return self.queue_2, customer_2, arrival_time_2
#         elif len(self.queue_1.items) > 0:
#             customer_1, arrival_time_1 = self.queue_1.items[0]
#             return self.queue_1, customer_1, arrival_time_1
#         elif len(self.queue_2.items) > 0:
#             customer_2, arrival_time_2 = self.queue_2.items[0]
#             return self.queue_2, customer_2, arrival_time_2
#         else:
#             return None, None, None

#     def select_first_come_first_served_customer(self):
#         """Select the first customer from queue 1 or queue 2."""
#         if len(self.queue_1.items) > 0:
#             return self.queue_1, *self.queue_1.items[0]
#         elif len(self.queue_2.items) > 0:
#             return self.queue_2, *self.queue_2.items[0]
#         return None, None, None



#     def customer_arrival(self, customer_type, arrival_rate):
#         while True:
#             inter_arrival_time = np.random.exponential(1 / arrival_rate)
#             yield self.env.timeout(inter_arrival_time)
#             arrival_time = self.env.now

#             if arrival_time > T:
#                 self.customers_rejected += 1
#                 print(f"Customer rejected at time {arrival_time:.2f}: No new arrivals after T = {T}")
#                 continue

#             if customer_type == 1:
#                 self.queue_1.put((customer_type, arrival_time))
#             else:
#                 self.queue_2.put((customer_type, arrival_time))

#             self.env.process(self.assign_customer())


# def run_simulation(simulation_time, use_longest_waiting=True):
#     env = simpy.Environment()
#     call_center = CallCenter(env, dt, V, use_longest_waiting)

#     env.process(call_center.customer_arrival(1, lambda_1))
#     env.process(call_center.customer_arrival(2, lambda_2))

#     env.run(until=simulation_time)

#     print(f"\nTotal customers served: {call_center.customers_served}")
#     print(f"Total waiting time: {call_center.total_waiting_time:.2f} minutes")
#     print(f"Total agent overtime cost: {call_center.total_agent_overtime_cost:.2f} minutes")
#     print(f"Total rewards: {call_center.total_rewards:.2f}")
#     print(f"Total customers rejected after T = {T}: {call_center.customers_rejected}")
#     print(f"Average waiting time per customer: "
#           f"{(call_center.total_waiting_time / call_center.customers_served):.2f} minutes")

#     # Calculate and display transition probabilities
#     probabilities = call_center.calculate_transition_probabilities()
#     print("\nTransition Probabilities:")
#     for (initial_state, final_state), prob in probabilities.items():
#         print(f"  P({final_state} | {initial_state}) = {prob:.4f}")

#     return call_center.total_waiting_time, call_center.total_agent_overtime_cost, call_center.customers_served, call_center.total_rewards, call_center.customers_rejected, probabilities


# # Run the simulations
# simulation_time = simulation_end_time

# # With longest waiting customer function
# waiting_time_longest, agent_overtime_longest, served_longest, rewards_longest, rejected_longest, probs_longest = run_simulation(simulation_time, use_longest_waiting=True)

# # Without longest waiting customer function
# waiting_time_fcfs, agent_overtime_fcfs, served_fcfs, rewards_fcfs, rejected_fcfs, probs_fcfs = run_simulation(simulation_time, use_longest_waiting=False)


# # Prepare the results for the table
# results_data = {
#     "Metric": [
#         "Total Waiting Time (minutes)",
#         "Total Agent Overtime Cost",
#         "Total Customers Served",
#         "Total Rewards",
#         "Total Customers Rejected"
#     ],
#     "With Longest Waiting": [
#         waiting_time_longest,
#         agent_overtime_longest,
#         served_longest,
#         rewards_longest,
#         rejected_longest
#     ],
#     "Without Longest Waiting": [
#         waiting_time_fcfs,
#         agent_overtime_fcfs,
#         served_fcfs,
#         rewards_fcfs,
#         rejected_fcfs
#     ],
#     "Difference (Without - With)": [
#         waiting_time_fcfs - waiting_time_longest,
#         agent_overtime_fcfs - agent_overtime_longest,
#         served_fcfs - served_longest,
#         rewards_fcfs - rewards_longest,
#         rejected_fcfs - rejected_longest
#     ]
# }

# # Create the DataFrame
# results_df = pd.DataFrame(results_data)

# # Display the results as a table
# print(results_df.to_string(index=False))



In [40]:
# import numpy as np
# import simpy
# import pandas as pd

# # Define parameters
# lambda_1 = 4  # Arrival rate for primary questions (customers per minute)
# lambda_2 = 2  # Arrival rate for complex questions (customers per minute)
# mu_1 = 3      # Service rate for less experienced agent (customers per minute)
# mu_2 = 4      # Service rate for more experienced agent (customers per minute)
# M = 20         # Maximum number of calls in the system
# T = 40        # Simulation time threshold for accepting new customers
# simulation_end_time = 60  # Total simulation time
# dt = 0.6

# overtime_cost_rate = 1  # Overtime cost rate per minute beyond threshold (agent working overtime)


# class CallCenter:
#     def __init__(self, env, dt, use_longest_waiting=True):
#         self.env = env
#         self.dt = dt
#         self.use_longest_waiting = use_longest_waiting
#         self.queue_1 = simpy.Store(env)  # Queue for primary questions
#         self.queue_2 = simpy.Store(env)  # Queue for complex questions
#         self.agent_1 = simpy.Resource(env, capacity=1)
#         self.agent_2 = simpy.Resource(env, capacity=1)
#         self.total_waiting_time = 0
#         self.total_agent_overtime_cost = 0
#         self.total_rewards = 0
#         self.customers_served = 0
#         self.customers_rejected = 0
#         self.transition_counts = {}
#         self.state_counts = {}
#         self.value_function = {}  # Stores the value function
#         self.policy = {}  # Stores the optimal policy for each state

#     def record_transition(self, current_state, next_state):
#         """Records state transitions for computing probabilities."""
#         self.transition_counts[(current_state, next_state)] = (
#             self.transition_counts.get((current_state, next_state), 0) + 1
#         )
#         self.state_counts[current_state] = self.state_counts.get(current_state, 0) + 1

#     def calculate_transition_probabilities(self):
#         """Calculates transition probabilities from recorded transitions."""
#         probabilities = {}
#         for (current_state, next_state), count in self.transition_counts.items():
#             total_transitions_from_state = self.state_counts[current_state]
#             probabilities[(current_state, next_state)] = count / total_transitions_from_state
#         return probabilities

#     def value_iteration(self, gamma=0.9, iterations=100):
#         """Performs value iteration to compute the value function and optimal policy."""
#         states = [(q1, q2) for q1 in range(M + 1) for q2 in range(M + 1)]
#         self.value_function = {state: 0 for state in states}
#         self.policy = {state: None for state in states}

#         for _ in range(iterations):
#             new_value_function = {}
#             for state in states:
#                 q1, q2 = state
#                 values = []

#                 # Action: Assign to Agent 1
#                 if q1 > 0:
#                     reward = -1  # Example reward: negative wait penalty
#                     next_state = (max(q1 - 1, 0), q2)
#                     value = reward + gamma * self.value_function[next_state]
#                     values.append((value, "Agent 1"))

#                 # Action: Assign to Agent 2
#                 if q2 > 0:
#                     reward = -1  # Example reward: negative wait penalty
#                     next_state = (q1, max(q2 - 1, 0))
#                     value = reward + gamma * self.value_function[next_state]
#                     values.append((value, "Agent 2"))

#                 # Take the best action
#                 if values:
#                     new_value_function[state], self.policy[state] = max(values)
#                 else:
#                     new_value_function[state] = 0  # No actions available

#             self.value_function = new_value_function

#     def handle_customer(self, customer_type, agent, service_rate, arrival_time, state):
#         with (self.agent_1 if agent == "Agent 1" else self.agent_2).request() as request:
#             yield request
#             service_time = np.random.exponential(1 / service_rate)
#             end_time = self.env.now + service_time  # Calculate when the service will end

#             # Simulate the service time
#             yield self.env.timeout(service_time)

#             wait_time = self.env.now - arrival_time  # Calculate wait time
#             self.total_waiting_time += wait_time

#             # Initialize agent_overtime_cost to zero
#             agent_overtime_cost = 0

#             # Check for overtime due to working beyond T
#             if end_time > T:
#                 overtime_hours = end_time - T
#                 agent_overtime_cost = overtime_hours * overtime_cost_rate
#                 self.total_agent_overtime_cost += agent_overtime_cost

#             # Update rewards
#             reward = -(wait_time + agent_overtime_cost)  # Negative rewards for wait time and overtime
#             self.total_rewards += reward

#             self.customers_served += 1

#             next_state = (len(self.queue_1.items), len(self.queue_2.items))

#             # Record the state transition
#             self.record_transition(state, next_state)

#             # Log details
#             print(
#                 f"At time {self.env.now:.2f}: Customer type {customer_type} served by {agent} "
#                 f"in {service_time:.2f} minutes, waited {wait_time:.2f} minutes "
#                 f"(Overtime cost: {agent_overtime_cost:.2f}, Reward: {reward:.2f})"
#             )
#             print(f"    Transition: State {state} -> State {next_state}")

#     def assign_customer(self):
#       while True:
#           if self.agent_1.count == 0 or self.agent_2.count == 0:
#               queue, customer_type, arrival_time = self.select_customer()

#               if queue is not None:
#                   state = (len(self.queue_1.items), len(self.queue_2.items))  # Current state

#                   # Validate the state
#                   if len(self.queue_1.items) + len(self.queue_2.items) > M:
#                       print(f"Invalid state encountered: {state}. Skipping.")
#                       continue

#                   yield queue.get()

#                   # Use the policy to select the agent, with a fallback for missing states
#                   if state in self.policy:
#                       chosen_agent = self.policy[state]
#                   else:
#                       print(f"Warning: State {state} not found in policy. Using default fallback.")
#                       chosen_agent = "Agent 1" if self.agent_1.count == 0 else "Agent 2"

#                   service_rate = mu_1 if chosen_agent == "Agent 1" else mu_2

#                   # Start handling the customer
#                   self.env.process(self.handle_customer(customer_type, chosen_agent, service_rate, arrival_time, state))

#           # Update rewards based on current queue lengths
#           queue_penalty = -(len(self.queue_1.items) + len(self.queue_2.items))  # Negative reward for queue lengths
#           self.total_rewards += queue_penalty

#           yield self.env.timeout(dt)  # Allow other processes to run

#     def select_customer(self):
#         """Selects the customer based on the queue and waiting strategy."""
#         if self.use_longest_waiting:
#             return self.select_longest_waiting_customer()
#         return self.select_first_come_first_served_customer()

#     def select_longest_waiting_customer(self):
#         """Select the longest waiting customer from the queues."""
#         if len(self.queue_1.items) > 0 and len(self.queue_2.items) > 0:
#             customer_1, arrival_time_1 = self.queue_1.items[0]
#             customer_2, arrival_time_2 = self.queue_2.items[0]
#             if arrival_time_1 <= arrival_time_2:
#                 return self.queue_1, customer_1, arrival_time_1
#             else:
#                 return self.queue_2, customer_2, arrival_time_2
#         elif len(self.queue_1.items) > 0:
#             customer_1, arrival_time_1 = self.queue_1.items[0]
#             return self.queue_1, customer_1, arrival_time_1
#         elif len(self.queue_2.items) > 0:
#             customer_2, arrival_time_2 = self.queue_2.items[0]
#             return self.queue_2, customer_2, arrival_time_2
#         else:
#             return None, None, None

#     def select_first_come_first_served_customer(self):
#         """Select the first customer from queue 1 or queue 2."""
#         if len(self.queue_1.items) > 0:
#             return self.queue_1, *self.queue_1.items[0]
#         elif len(self.queue_2.items) > 0:
#             return self.queue_2, *self.queue_2.items[0]
#         return None, None, None

#     def customer_arrival(self, customer_type, arrival_rate):
#         while True:
#             inter_arrival_time = np.random.exponential(1 / arrival_rate)
#             yield self.env.timeout(inter_arrival_time)
#             arrival_time = self.env.now

#             if arrival_time > T:
#                 self.customers_rejected += 1
#                 print(f"Customer rejected at time {arrival_time:.2f}: No new arrivals after T = {T}")
#                 continue

#             if customer_type == 1:
#                 self.queue_1.put((customer_type, arrival_time))
#             else:
#                 self.queue_2.put((customer_type, arrival_time))

#             self.env.process(self.assign_customer())


# def run_simulation(simulation_time, use_longest_waiting=True):
#     env = simpy.Environment()
#     call_center = CallCenter(env, dt, use_longest_waiting)

#     # Perform value iteration before running the simulation
#     call_center.value_iteration(gamma=0.9, iterations=100)

#     env.process(call_center.customer_arrival(1, lambda_1))
#     env.process(call_center.customer_arrival(2, lambda_2))

#     env.run(until=simulation_time)

#     print(f"\nTotal customers served: {call_center.customers_served}")
#     print(f"Total waiting time: {call_center.total_waiting_time:.2f} minutes")
#     print(f"Total agent overtime cost: {call_center.total_agent_overtime_cost:.2f} minutes")
#     print(f"Total rewards: {call_center.total_rewards:.2f}")
#     print(f"Total customers rejected after T = {T}: {call_center.customers_rejected}")
#     print(f"Average waiting time per customer: "
#           f"{(call_center.total_waiting_time / call_center.customers_served):.2f} minutes")

#     # Calculate and display transition probabilities
#     probabilities = call_center.calculate_transition_probabilities()
#     print("\nTransition Probabilities:")
#     for (initial_state, final_state), prob in probabilities.items():
#         print(f"  P({final_state} | {initial_state}) = {prob:.4f}")


#     print("\nValue Function:")
#     for state, value in call_center.value_function.items():
#         print(f"State {state}: Value = {value:.2f}")

#     print("\nOptimal Policy:")
#     for state, action in call_center.policy.items():
#         print(f"State {state}: Action = {action}")

#     return call_center.total_waiting_time, call_center.total_agent_overtime_cost, call_center.customers_served, call_center.total_rewards, call_center.customers_rejected, call_center.policy


# # Run the simulations
# simulation_time = simulation_end_time

# # With longest waiting customer function
# waiting_time_longest, agent_overtime_longest, served_longest, rewards_longest, rejected_longest, probs_longest = run_simulation(simulation_time, use_longest_waiting=True)

# # Without longest waiting customer function
# waiting_time_fcfs, agent_overtime_fcfs, served_fcfs, rewards_fcfs, rejected_fcfs, probs_fcfs = run_simulation(simulation_time, use_longest_waiting=False)


# # Prepare the results for the table
# results_data = {
#     "Metric": [
#         "Total Waiting Time (minutes)",
#         "Total Agent Overtime Cost",
#         "Total Customers Served",
#         "Total Rewards",
#         "Total Customers Rejected"
#     ],
#     "With Longest Waiting": [
#         waiting_time_longest,
#         agent_overtime_longest,
#         served_longest,
#         rewards_longest,
#         rejected_longest
#     ],
#     "Without Longest Waiting": [
#         waiting_time_fcfs,
#         agent_overtime_fcfs,
#         served_fcfs,
#         rewards_fcfs,
#         rejected_fcfs
#     ],
#     "Difference (Without - With)": [
#         waiting_time_fcfs - waiting_time_longest,
#         agent_overtime_fcfs - agent_overtime_longest,
#         served_fcfs - served_longest,
#         rewards_fcfs - rewards_longest,
#         rejected_fcfs - rejected_longest
#     ]
# }

# # Create the DataFrame
# results_df = pd.DataFrame(results_data)

# # Display the results as a table
# print(results_df.to_string(index=False))



At time 0.08: Customer type 2 served by Agent 2 in 0.02 minutes, waited 0.02 minutes (Overtime cost: 0.00, Reward: -0.02)
    Transition: State (0, 1) -> State (1, 0)
At time 0.38: Customer type 1 served by Agent 2 in 0.24 minutes, waited 0.24 minutes (Overtime cost: 0.00, Reward: -0.24)
    Transition: State (1, 1) -> State (3, 2)
At time 0.68: Customer type 2 served by Agent 2 in 0.08 minutes, waited 0.54 minutes (Overtime cost: 0.00, Reward: -0.54)
    Transition: State (3, 2) -> State (4, 1)
At time 0.70: Customer type 1 served by Agent 1 in 0.70 minutes, waited 0.70 minutes (Overtime cost: 0.00, Reward: -0.70)
    Transition: State (1, 0) -> State (4, 1)
At time 0.84: Customer type 1 served by Agent 2 in 0.09 minutes, waited 0.59 minutes (Overtime cost: 0.00, Reward: -0.59)
    Transition: State (4, 1) -> State (5, 1)
At time 1.04: Customer type 1 served by Agent 2 in 0.19 minutes, waited 0.74 minutes (Overtime cost: 0.00, Reward: -0.74)
    Transition: State (5, 1) -> State (6, 1