# 

In [2]:
from typing import Optional
import numpy as np

import gymnasium as gym
import simpy

# Scenario

Guidelines how the shop will behave:
* The shop is tiny, only 2 operators
* 2 machines making 2 different products
* Making 2 different products:
  * Prod 1:
    * Mono step, only need machine 1
    * Cycle time = 3
    * Sell Value = 2
  * Prod 2:
    * Multi step, Machine 1 + 2 in that order 
    * Cycle time = 10 (M1) + 15 (M2)
    * Sell Value = 20
* Storages:
  * All storage have an inside capacity
  * outside is subject to lost products at night (loss define by 10% per unit per cycle time)
  * An operator can watch products to avoid stealing
* Raw product to be ordered
  * Lead time = 3 UOT per order
  * Initial Stock = 10
  * Purchase price = 1

In [36]:

import gymnasium as gym


class ShopEnv(gym.Env):
    def __init__(self, operatorcounts: int = 2):
        self.operatorcounts = operatorcounts
        self.observation_space = gym.spaces.Dict(
            {
                "stockraw2": gym.spaces.Tuple((gym.spaces.Box(low=0, high=10, dtype=int), # Stock level
                                             gym.spaces.Box(low=0, high=10, dtype=int) # Exposed to stealing
                                             )),
                "stockraw": gym.spaces.Box(low=0, high=10, dtype=int), # In percent Raw products to order
                "stockint": gym.spaces.Box(low=0, high=10, dtype=int), # Semi finished products
                "stock2sell": gym.spaces.Box(low=0, high=10, dtype=int), # Product ready to sell
                "gains": gym.spaces.Box(low=0, high=2**63-2, dtype=int), # Money made selling products
                "expenses": gym.spaces.Box(low=0, high=2**63-2, dtype=int), # Money spent + losses
                "op1remainingtodo" : gym.spaces.Box(low=0, high=1000),
                "op2remainingtodo" : gym.spaces.Box(low=0, high=1000),
            }
        )

        # Define what actions are available (4 directions)
        self.action_space = gym.spaces.Discrete(4) #?
        # What operator 1 needs to do, how long / many after he is done with his current task
        # What OPerator 2 needs to do, how long / many after he is done with his current task
        # How many raw products we need to order

        self._make_simpy_env()

    def _make_simpy_env(self):
        self.shopsim = simpy.Environment()
        self.shopsim = simpy.Environment()
        self.operators = simpy.Resource(self.shopsim, capacity=self.operatorcounts)
        self.stockraw = simpy.Container(self.shopsim, capacity=50, init=10)
        self.stockint = simpy.FilterStore(self.shopsim, capacity=50)
        self.stocksell = simpy.FilterStore(self.shopsim, capacity=50)

        self.machine1 = simpy.Resource(self.shopsim, capacity=1)
        self.machine2 = simpy.Resource(self.shopsim, capacity=1)
    
    
    def _get_obs(self):
        return {"stockraw": self.stockraw.level, "stockint": self.stockint.level}

    def _get_info(self):
        return {
            "prod1made": 0,
            "prod2made": 0,
            "prodLoss": 0,
            "prodsold": 0,
            "prodPurchased": 0,
            "Op1Status":0,
            "Op2Status":0,
        }

    def reset(self, seed: Optional[int] = None, options: Optional[dict] = None):
        super().reset(seed=seed)

        self._make_simpy_env()

        observation = self._get_obs()
        info = self._get_info()

        return observation, info

    def step(self, action):
        """Execute one timestep within the environment.

        Args:
            action: The action to take (0-3 for directions)

        Returns:
            tuple: (observation, reward, terminated, truncated, info)
        """
        # Map the discrete action (0-3) to a movement direction
        direction = self._action_to_direction[action]

        # Update agent position, ensuring it stays within grid bounds
        # np.clip prevents the agent from walking off the edge
        self._agent_location = np.clip(
            self._agent_location + direction, 0, self.size - 1
        )

        # Check if agent reached the target
        terminated = np.array_equal(self._agent_location, self._target_location)

        # We don't use truncation in this simple environment
        # (could add a step limit here if desired)
        truncated = False

        # Simple reward structure: +1 for reaching target, 0 otherwise
        # Alternative: could give small negative rewards for each step to encourage efficiency
        reward = 1 if terminated else 0

        observation = self._get_obs()
        info = self._get_info()

        return observation, reward, terminated, truncated, info

In [37]:
gym.register(
    id="TinyShop_env/Shop-v0",
    entry_point=ShopEnv,
    max_episode_steps=300,  # Prevent infinite episodes
)

In [38]:
env = gym.make("TinyShop_env/Shop-v0")

In [39]:
env.observation_space

Dict('expenses': Box(0, 9223372036854775806, (1,), int64), 'gains': Box(0, 9223372036854775806, (1,), int64), 'op1remainingtodo': Box(0.0, 1000.0, (1,), float32), 'op2remainingtodo': Box(0.0, 1000.0, (1,), float32), 'stock2sell': Box(0, 10, (1,), int64), 'stockint': Box(0, 10, (1,), int64), 'stockraw': Box(0, 10, (1,), int64), 'stockraw2': Tuple(Box(0, 10, (1,), int64), Box(0, 10, (1,), int64)))

In [32]:
env.action_space

Discrete(4)