<a href="https://colab.research.google.com/github/realakash140/Customlinuxcomand/blob/main/Akash_RL_(1).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import random

In [None]:
# Step 1: Define the Environment Class
class DeliveryEnvironment:
    def __init__(self, num_riders, num_orders):
        self.num_riders = num_riders
        self.num_orders = num_orders
        self.order_locations = np.random.uniform(0, 100, size=(num_orders, 2))
        self.rider_locations = np.random.uniform(0, 100, size=(num_riders, 2))
        self.rider_capacity = 5
        self.state = None

    def reset(self):
        self.state = {
            "rider_locations": self.rider_locations.copy(),
            "order_status": np.zeros(self.num_orders),  # 0 = unassigned, 1 = assigned
            "rider_orders": [[] for _ in range(self.num_riders)],
        }
        return self.state

    def step(self, rider_id, order_id):
        if self.state["order_status"][order_id] == 0 and len(self.state["rider_orders"][rider_id]) < self.rider_capacity:
            self.state["rider_orders"][rider_id].append(order_id)
            self.state["order_status"][order_id] = 1  # Mark order as assigned
            all_assigned = np.all(self.state["order_status"] == 1)
            return self.state, 10, all_assigned  # Reward for successful assignment
        else:
            return self.state, -1, False  # Penalty for invalid action


In [None]:
# Step 2: Q-Learning Implementation
class QLearningAgent:
    def __init__(self, num_riders, num_orders, learning_rate=0.1, discount_factor=0.9, epsilon=0.1):
        self.num_riders = num_riders
        self.num_orders = num_orders
        self.q_table = np.zeros((num_riders, num_orders))
        self.alpha = learning_rate
        self.gamma = discount_factor
        self.epsilon = epsilon

    def choose_action(self, state):
        valid_actions = np.argwhere(state["order_status"] == 0)
        if random.uniform(0, 1) < self.epsilon:
            rider = random.randint(0, self.num_riders - 1)
            order = random.choice(valid_actions[:, 0])
            return rider, order
        else:
            max_q = -np.inf
            best_action = (0, 0)
            for rider in range(self.num_riders):
                for order in valid_actions[:, 0]:
                    if self.q_table[rider, order] > max_q:
                        max_q = self.q_table[rider, order]
                        best_action = (rider, order)
            return best_action

    def update_q_value(self, rider, order, reward, next_state):
        next_valid_orders = np.argwhere(next_state["order_status"] == 0)
        max_next_q = 0 if len(next_valid_orders) == 0 else np.max(self.q_table[:, next_valid_orders[:, 0]])
        self.q_table[rider, order] += self.alpha * (
            reward + self.gamma * max_next_q - self.q_table[rider, order]
        )


In [None]:
# Step 3: Training the RL Agent
def train_agent(env, agent, num_episodes):
    performance_metrics = []
    for episode in range(num_episodes):
        state = env.reset()
        total_reward = 0
        done = False

        while not done:
            rider, order = agent.choose_action(state)
            next_state, reward, done = env.step(rider, order)
            agent.update_q_value(rider, order, reward, next_state)
            state = next_state
            total_reward += reward

        performance_metrics.append(total_reward)
        if episode % 10 == 0:
            print(f"Episode {episode + 1}: Total Reward = {total_reward}")

    return performance_metrics

In [None]:
# Step 4: Main Execution
if __name__ == "__main__":
    num_riders = 10
    num_orders = 50
    num_episodes = 100

    env = DeliveryEnvironment(num_riders, num_orders)
    agent = QLearningAgent(num_riders, num_orders)

    performance_metrics = train_agent(env, agent, num_episodes)

    print("Training Completed")
    print("Performance Metrics:", performance_metrics)


Episode 1: Total Reward = 326
Episode 11: Total Reward = 389
Episode 21: Total Reward = 442
Episode 31: Total Reward = 425
Episode 41: Total Reward = 472
Episode 51: Total Reward = 394
Episode 61: Total Reward = 361
Episode 71: Total Reward = 467
Episode 81: Total Reward = 361
Episode 91: Total Reward = 373
Training Completed
Performance Metrics: [326, 462, 494, 449, 437, 464, 439, 374, 464, 476, 389, 366, 398, 460, 433, 459, 280, 416, 408, 440, 442, 342, 444, 352, 401, 289, 310, 450, 204, 426, 425, 352, 325, 441, 438, 331, 423, 436, 265, 403, 472, 373, 402, 427, 338, 474, 448, 212, 329, 421, 394, 351, 437, 429, 455, 405, 357, 428, 411, 430, 361, 422, 459, 472, 398, 361, 305, 282, 404, 387, 467, 454, 429, 465, 430, 466, 244, 378, 460, 354, 361, 393, 461, 404, 390, 336, 335, 448, 412, 380, 373, 400, 367, 339, 425, 423, 423, 473, 453, 413]
