# 

In [4]:
from typing import Optional
import numpy as np

import gymnasium as gym
import simpy

# Scenario

Guidelines how the shop will behave:
* The shop is tiny, only 2 operators
* 2 machines making 2 different products
* Making 2 different products:
  * Prod 1:
    * Mono step, only need machine 1
    * Cycle time = 3
    * Sell Value = 2
  * Prod 2:
    * Multi step, Machine 1 + 2 in that order 
    * Cycle time = 10 (M1) + 15 (M2)
    * Sell Value = 20
* Storages:
  * All storage have an inside capacity
  * outside is subject to lost products at night (loss define by 10% per unit per cycle time)
  * An operator can watch products to avoid stealing
* Raw product to be ordered
  * Lead time = 3 UOT per order
  * Initial Stock = 10
  * Purchase price = 1

In [69]:

import gymnasium as gym


class ShopEnv(gym.Env):
    def __init__(self,
                 operatorcounts: int = 2,
                 machine_count: int = 3,
                 product_count: int = 2,
                 step: int = 10
                ):
        self.operatorcounts = operatorcounts
        self.observation_space = gym.spaces.Dict(
            {
                "stockraw": gym.spaces.Box(low=0, high=10, dtype=int), # In percent Raw products to order
                "stockint": gym.spaces.Box(low=0, high=10, dtype=int), # Semi finished products
                "stock2sell": gym.spaces.Box(low=0, high=10, dtype=int), # Product ready to sell
                "gains": gym.spaces.Box(low=0, high=2**63-2, dtype=int), # Money made selling products
                "expenses": gym.spaces.Box(low=0, high=2**63-2, dtype=int), # Money spent + losses
                "opremainingtodo" : gym.spaces.Box(low=0, high=1000, shape=(self.operatorcounts, )),
            }
        )
        
        # Define what actions are available (4 directions)
        self.product_count = product_count  # Define maximum number of products
        self.machine_count = machine_count

        self._make_simpy_env()
        
        self.action_space = gym.spaces.Dict({
            # Allows to propose an alternative campaign size remaining
            'current_batch': gym.spaces.MultiDiscrete([1001] * self.product_count),
            # Enforce the campaign
            'force_current_batch': gym.spaces.MultiBinary(self.product_count),
            # Next Campaign batch size
            'next_batch': gym.spaces.MultiDiscrete([1001] * self.product_count),
            # Ranking for which products are active, higher better
            'active_products': gym.spaces.Box(low=np.zeros(self.product_count),
                                             high=np.ones(self.product_count),
                                             dtype=np.float32),
            # Ranking for which machines are active, higher better
            'active_machines': gym.spaces.Box(low=np.zeros(self.machine_count),
                                             high=np.ones(self.machine_count),
                                             dtype=np.float32),
            'order_raw_prod': gym.spaces.Discrete(100)
        })

        self.current_batch = np.zeros(self.product_count)
        self.force_current_batch = np.zeros(self.product_count)
        self.next_batch =np.zeros(self.product_count)
        

        
        self.step = step

    def _make_simpy_env(self):
        self.shopsim = simpy.Environment()
        self.operators = simpy.Resource(self.shopsim, capacity=self.operatorcounts)
        self.stockraw = simpy.Container(self.shopsim, capacity=50, init=10)
        self.stockint = simpy.FilterStore(self.shopsim, capacity=50)
        self.stocksell = simpy.FilterStore(self.shopsim, capacity=50)

        # Need to embed the machines in Named tuple or dicts
        for machine_num in range(self.machine_count):
            setattr(self, f'machine_{machine_num}', simpy.Resource(self.shopsim, capacity=1))  
        # self.machine1 = simpy.Resource(self.shopsim, capacity=1)
        # self.machine2 = simpy.Resource(self.shopsim, capacity=1)

        self.now_sim = 0
    
    
    def _get_obs(self):
        return {"stockraw": self.stockraw.level,
                "stockint": len(self.stockint.items),
                "stock2sell": len(self.stocksell.items),
                "gains": 0,
                "expenses" : 0,
                "opremainingtodo": np.zeros(self.operatorcounts)
               }

    def _get_info(self):
        # Only for debugging purpose
        return {
            "prod1made": 0,
            "prod2made": 0,
            "prodLoss": 0,
            "prodsold": 0,
            "operatFree":0,
        }

    def reset(self, seed: Optional[int] = None, options: Optional[dict] = None):
        super().reset(seed=seed)

        self._make_simpy_env()

        observation = self._get_obs()
        info = self._get_info()

        return observation, info

    def order_raw_product(self, qty):
        yield self.shopsim.timeout(30)
        yield self.stockraw.put(qty)

    def step(self, action):
        # Does he want to enforce a new batch size:
        for i, (enforced, newbatch) in enumerate(zip(action.force_current_batch, action.current_batch)):
            if enforced:
                self.current_batch = newbatch
                reward -= 1

        # We sell the products that are available

        # We unfortunately get some products stolen

        # We set what should be done next

        # We order raw product
        self.shopsim.process(self.order_raw_product(action.order_raw_prod))
                
        terminated = False
        truncated = False

        # Execute the Simpy
        self.now_sim += self.step
        self.shopsim.run(until=self.now_sim)

        reward = 0

        observation = self._get_obs()
        info = self._get_info()

        return observation, reward, terminated, truncated, info

In [70]:
gym.register(
    id="TinyShop_env/Shop-v0",
    entry_point=ShopEnv,
    max_episode_steps=300,  # Prevent infinite episodes
)

In [71]:
env = gym.make("TinyShop_env/Shop-v0")

In [72]:
env.observation_space

Dict('expenses': Box(0, 9223372036854775806, (1,), int64), 'gains': Box(0, 9223372036854775806, (1,), int64), 'opremainingtodo': Box(0.0, 1000.0, (2,), float32), 'stock2sell': Box(0, 10, (1,), int64), 'stockint': Box(0, 10, (1,), int64), 'stockraw': Box(0, 10, (1,), int64))

In [73]:
env.action_space

Dict('active_machines': Box(0.0, 1.0, (3,), float32), 'active_products': Box(0.0, 1.0, (2,), float32), 'current_batch': MultiDiscrete([1001 1001]), 'force_current_batch': MultiBinary(2), 'next_batch': MultiDiscrete([1001 1001]), 'order_raw_prod': Discrete(100))

In [74]:
env.reset()

({'stockraw': 10,
  'stockint': 0,
  'stock2sell': 0,
  'gains': 0,
  'expenses': 0,
  'opremainingtodo': array([0., 0.])},
 {'prod1made': 0,
  'prod2made': 0,
  'prodLoss': 0,
  'prodsold': 0,
  'operatFree': 0})