In [None]:
import numpy as np
import gymnasium as gym
from gymnasium import spaces

class InventoryManagementEnv(gym.Env):
    """
    Inventory Management environment for perishable goods with variable goods and spoilage processes.
    """
    def __init__(self, N, horizon=7, max_inventory=100, spoilage_rates=None, demand_mean=20, holding_cost=1, spoilage_penalty=5, stockout_penalty=10):
        super(InventoryManagementEnv, self).__init__()
        
        self.N = N  # Number of different goods
        self.horizon = horizon  # Finite time horizon
        self.max_inventory = max_inventory  # Max inventory per good
        self.demand_mean = demand_mean  # Mean demand per good per step
        self.holding_cost = holding_cost  # Cost per item in inventory
        self.spoilage_penalty = spoilage_penalty  # Penalty per spoiled item
        self.stockout_penalty = stockout_penalty  # Penalty per unmet demand
        
        # Spoilage rates for each good (probability that an item spoils in each step)
        self.spoilage_rates = spoilage_rates if spoilage_rates else np.random.uniform(0.1, 0.3, N)
        
        # Observation space: Inventory levels and freshness (age) of each item
        self.observation_space = spaces.Dict({
            'inventory': spaces.Box(low=0, high=self.max_inventory, shape=(N,), dtype=np.int32),
            'ages': spaces.Box(low=0, high=self.horizon, shape=(N,), dtype=np.int32)  # Age indicates freshness
        })
        
        # Action space: Reorder quantities for each good
        self.action_space = spaces.Box(low=0, high=self.max_inventory, shape=(N,), dtype=np.int32)
        
        # Reset internal variables
        self.reset()

    def reset(self):
        # Reset the environment for a new episode
        self.inventory = np.zeros(self.N, dtype=np.int32)  # Start with no stock
        self.ages = np.zeros(self.N, dtype=np.int32)  # All items are "fresh"
        self.timestep = 0  # Current timestep
        return self._get_obs()

    def _get_obs(self):
        # Return the current state as the observation
        return {
            'inventory': self.inventory,
            'ages': self.ages
        }

    def step(self, action):
        """
        Take an action (reorder quantities) and update the state of the environment.
        """
        reward = 0  # Initialize reward for this step

        # Process each item type independently
        demand = np.random.poisson(self.demand_mean, self.N)  # Random demand for each good
        
        # Fulfill demand with available inventory (FIFO order)
        fulfilled_demand = np.minimum(demand, self.inventory)
        reward += np.sum(fulfilled_demand)  # Revenue from sales
        unmet_demand = demand - fulfilled_demand  # Unmet demand
        
        # Calculate stockout penalty for unmet demand
        reward -= np.sum(unmet_demand) * self.stockout_penalty
        
        # Age all items by 1 day and apply spoilage
        self.ages += 1  # All items get older
        spoiled_items = (self.ages > self.horizon) | (np.random.rand(self.N) < self.spoilage_rates)  # Spoiled if age limit exceeded or by spoilage rate
        spoiled_count = self.inventory * spoiled_items.astype(np.int32)  # Number of spoiled items
        reward -= np.sum(spoiled_count) * self.spoilage_penalty  # Penalty for spoiled items
        self.inventory = self.inventory - spoiled_count  # Remove spoiled items
        self.inventory = np.clip(self.inventory, 0, self.max_inventory)  # Ensure inventory is non-negative
        
        # Add newly ordered stock and reset ages for new stock
        self.inventory += action
        self.ages = np.where(action > 0, 0, self.ages)  # Reset age for newly ordered items
        
        # Holding costs
        reward -= np.sum(self.inventory) * self.holding_cost  # Holding cost for items in inventory
        
        # Update time step and check if done (finite horizon)
        self.timestep += 1
        done = self.timestep >= self.horizon
        
        # Return step information
        return self._get_obs(), reward, done, {}

    def render(self):
        # Optional: Render the environment's state (print current inventory and ages)
        print(f"Timestep {self.timestep}")
        print("Inventory:", self.inventory)
        print("Ages:", self.ages)

# Example usage:
# env = InventoryManagementEnv(N=5, horizon=7)
# obs = env.reset()
# done = False
# while not done:
#     action = env.action_space.sample()  # Randomly sample action for demonstration
#     obs, reward, done, _ = env.step(action)
#     env.render()
#     print("Reward:", reward)
