# 

In [3]:
from typing import Optional
import numpy as np

import gymnasium as gym
import simpy
from collections import namedtuple

# Scenario

Guidelines how the shop will behave:
* The shop is tiny, only 2 operators
* 2 machines making 2 different products
* Making 2 different products:
  * Prod 1:
    * Mono step, only need machine 1
    * Cycle time = 3
    * Sell Value = 2
  * Prod 2:
    * Multi step, Machine 1 + 2 in that order 
    * Cycle time = 10 (M1) + 15 (M2)
    * Sell Value = 20
* Storages:
  * All storage have an inside capacity
  * outside is subject to lost products at night (loss define by 10% per unit per cycle time)
* Raw product to be ordered
  * Lead time = 3 UOT per order
  * Initial Stock = 10
  * Purchase price = 1
* Selling / profit
  * Everything that is available to be sold will be sold every 6 unit steps

In [5]:
Action = namedtuple('Action', [
    'current_batch',
    'force_current_batch', 
    'next_batch',
    'ranking_next',
    'order_raw_prod'
])

In [885]:
import gymnasium as gym

class ShopEnv(gym.Env):
    def __init__(self,
                 operatorcounts: int = 2,
                 machine_count: int = 2,
                 product_count: int = 2,
                 step: int = 1 #Hour
                ):
        self.operatorcounts = operatorcounts
        self.product_count = product_count
        self.machine_count = machine_count
        
        self.prod_assignment = np.zeros((self.product_count, self.machine_count))
        # Prod A only needs machine 1 for 3 unit of time (in minutes)
        self.prod_assignment[0, 0] = 3
        self.prod_assignment[0, 1] = 0
        # Prod B needs machine 1 for 10 unit of time, and 15 for machine 2 (in minutes)
        self.prod_assignment[1, 0] = 10
        self.prod_assignment[1, 1] = 15

        self.prod_dict = {'0': {'Name': 'A', 'Cost': 3},
                          '1': {'Name': 'B', 'Cost': 20},
                          }

        # Where products need to go
        # Intermediary stock
        self.to_stock_prod = np.zeros((self.product_count, self.machine_count))
        self.to_stock_prod[0,0] = 0
        self.to_stock_prod[0,1] = 0
        # Prod B needs machine 1 for 10 unit of time, and 15 for machine 2 (in minutes)
        self.to_stock_prod[1,0] = 1
        self.to_stock_prod[1,1] = 0

        # Selling Stock
        self.to_sell_prod = np.zeros((self.product_count, self.machine_count))
        self.to_sell_prod[0,0] = 1
        self.to_sell_prod[0,1] = 0
        # Prod B needs machine 1 for 10 unit of time, and 15 for machine 2 (in minutes)
        self.to_sell_prod[1,0] = 0
        self.to_sell_prod[1,1] = 1
        
        self.pending_raw = 0
        
        self.observation_space = gym.spaces.Dict(
            {

                # Stocks 
                "stockraw_used": gym.spaces.Box(low=0, high=10, dtype=int),
                "stockraw_free": gym.spaces.Box(low=0, high=10, dtype=int),
                "stockint_used": gym.spaces.Box(low=0, high=20, dtype=int),
                "stockint_free": gym.spaces.Box(low=0, high=20, dtype=int),
                "stock2sell_used": gym.spaces.Box(low=0, high=10, dtype=int),
                "stock2sell_free": gym.spaces.Box(low=0, high=10, dtype=int),

                # What is in progress and how many to do next
                'current_batch_remaining': gym.spaces.MultiDiscrete(np.ones((self.product_count, self.machine_count))*1000),
                'next_batch': gym.spaces.MultiDiscrete(np.ones((self.product_count, self.machine_count))*1000),
                'ranking_next': gym.spaces.Box(low=np.zeros((self.product_count, self.machine_count)),
                                               high=np.ones((self.product_count, self.machine_count)), dtype=np.float32),

                # How does the staffing look like
                "count_operators_busy" : gym.spaces.Discrete(self.operatorcounts, start=0),
                "count_operators_free" : gym.spaces.Discrete(self.operatorcounts, start=0),

                # Where is made what with the cycle time
                "prod_assignment": gym.spaces.Box(low=0, high=500, shape=(self.product_count, self.machine_count), dtype=np.float32),

                # Pending Reception
                "pending_reception": gym.spaces.Discrete(100, start=0),

                # Time of day
                "timeday": gym.spaces.Discrete(24, start=0)
            }
        )

        self._make_simpy_env()
        
        self.action_space = gym.spaces.Dict({
            # Allows to propose an alternative campaign size remaining
            'current_batch': gym.spaces.MultiDiscrete(np.ones((self.product_count, self.machine_count))*1000),
            # Enforce the campaign
            'force_current_batch': gym.spaces.MultiBinary(self.product_count),
            # Next Campaign batch size
            'next_batch': gym.spaces.MultiDiscrete(np.ones((self.product_count, self.machine_count))*1000),
            # Ranking for which products are active, higher better
            'ranking_next': gym.spaces.Box(low=np.zeros((self.product_count, self.machine_count)),
                                           high=np.ones((self.product_count, self.machine_count)), dtype=np.float32),
            # Do we need to order anything
            'order_raw_prod': gym.spaces.Discrete(100)
        })

        self.current_batch = np.zeros((self.product_count, self.machine_count))
        self.force_current_batch = np.zeros((self.product_count, self.machine_count))
        self.next_batch = np.zeros((self.product_count, self.machine_count))
        self.ranking_next = np.zeros((self.product_count, self.machine_count))

        # Unit of time for each step
        self.step_size = step # Decisions are made every hour
        self.episode_end = 7 * 24 # End after 7 days
        self.salesrewards = 0

        self.prod_log = np.zeros((self.product_count, self.machine_count))
        self.sell_log = np.zeros(self.product_count)

    def _make_simpy_env(self):
        self.shopsim = simpy.Environment()
        
        self.operators = simpy.Resource(self.shopsim, capacity=self.operatorcounts)
        
        self.stockraw = simpy.Container(self.shopsim, capacity=100, init=10)
        
        self.stockint = simpy.FilterStore(self.shopsim, capacity=50)
        self.stocksell = simpy.FilterStore(self.shopsim, capacity=50)

        for machine_num in range(self.machine_count):
            setattr(self, f'machine_{machine_num}', simpy.Resource(self.shopsim, capacity=1))  

        self.now_sim = 0

        self.shopsim.process(self.steal_product_at_night())
        self.shopsim.process(self.sell_products())
        self.shopsim.process(self.get_operators_to_work())
    
    
    def _get_obs(self):
        return {"stockraw_used": self.stockraw.level,
                "stockraw_free": self.stockraw.capacity - self.stockraw.level,
                "stockint_used": len(self.stockint.items),
                "stockint_free": self.stockint.capacity - len(self.stockint.items),
                "stock2sell_used": len(self.stocksell.items),
                "stock2sell_free": self.stocksell.capacity - len(self.stocksell.items),
                
                "current_batch_remaining": self.current_batch,
                "next_batch" : self.next_batch,
                "ranking_next": self.ranking_next,
                "count_operators_busy": self.operators.count,
                "count_operators_free": self.operators.capacity-self.operators.count,
                "prod_assignment": self.prod_assignment,
                "pending_reception": self.pending_raw,

                "timeday": self.shopsim.now
                
               }

    def _get_info(self):
        # Only for debugging purpose
        return {
            "production_log": self.prod_log,
            "sell_log": self.sell_log
        }

    def reset(self, seed: Optional[int] = None, options: Optional[dict] = None):
        super().reset(seed=seed)
        self._make_simpy_env()
        observation = self._get_obs()
        info = self._get_info()
        return observation, info

    def order_raw_product(self, qty):
        self.pending_raw += qty
        yield self.shopsim.timeout(3)
        roominstock = min(self.stockraw.capacity - self.stockraw.level, qty)
        if (roominstock>0) and (roominstock<=self.stockraw.capacity):
            yield self.stockraw.put(roominstock)
        self.pending_raw -= qty

    def steal_product_at_night(self, threshold_stock=10, day_hour=6, probability=.1):
        # at night for 12 hours each product not in the shop is subject to be stollen at a 10 % chance per cycle time
        while True:
            if self.shopsim.now % day_hour > day_hour/2:
                print('We are in Night mode, get your products in !!')
                # Filter Stores
                for stock in (self.stockint, self.stocksell):
                    if len(stock.items) > threshold_stock:
                        at_risk = (np.random.random(len(stock.item)-threshold_stock) < probability).astype(int)
                        for product_index in at_risk:
                            if product_index == 1:
                                yield stock.get()
                                print('product Stollen from Store')
                # Containers 
                for stock in (self.stockraw, ):
                    if stock.level > threshold_stock:
                        at_risk = (np.random.random(stock.level-threshold_stock) < probability).astype(int)
                        for product_index in at_risk:
                            if product_index == 1:
                                yield stock.get(1)
                                print('product Stollen fron Container')
            else:
                print('We are in Day mode')
            yield self.shopsim.timeout(self.step_size)

    def make_products(self, machine_j=0, prod_i=0, patience = 15):
        prod_made = 0
        cycletime = self.prod_assignment[prod_i, machine_j] #Minutes
        machine = getattr(self, f'machine_{machine_j}')

        # That needs to be improved / automated
        # if prod_i == 0:
        stockin = self.stockraw
        filterstore = False
        if prod_i == 1 and machine_j == 1:
            stockin = self.stockint
            filterstore = True
        
        if self.to_stock_prod[prod_i, machine_j] == 1:
            stockout = self.stockint
        if self.to_sell_prod[prod_i, machine_j] == 1:
            stockout = self.stocksell
        
        with self.operators.request() as op_req, machine.request() as machine_req:
            # Wait for both resources to be available
            yield op_req
            yield machine_req
            while prod_made < self.current_batch[prod_i, machine_j]:
                if filterstore:
                    # print(stockin.items , machine_j)
                    event = stockin.get(lambda x: x==str(prod_i))
                else:
                    event = stockin.get(1)

                timeout_event = self.shopsim.timeout(patience/60)

                result = yield event | timeout_event

                if event in result:
                    prod_made += 1
                    self.prod_log[prod_i, machine_j] +=1
                    yield stockout.put(str(prod_i))
                    print(f'Just made a product {self.prod_dict.get(str(prod_i))['Name']} - Machine {machine_j}')
                    yield self.shopsim.timeout(cycletime/60)
                else:
                    print(f'Operator Gave up after {patience} minutes weight at {self.shopsim.now} - Machine {machine_j}')
                    break
        if prod_made == self.current_batch[prod_i, machine_j]:          
            self.current_batch[prod_i, machine_j] = self.next_batch[prod_i, machine_j]
            self.current_batch[prod_i, machine_j] = 0

    def get_operators_to_work(self):
        while True:
            if self.current_batch.sum() == 0:
                # If no current task, pass next to current
                for i, j in np.ndindex(action.next_batch.shape):
                    if self.current_batch[i, j] == 0:
                        self.current_batch[i, j] = action.next_batch[i, j]
                        self.next_batch[i, j] = 0
            if self.operators.count <= self.operators.capacity:
                max_idx = np.argmax(self.ranking_next)
                i, j = np.unravel_index(max_idx, self.ranking_next.shape)
                self.ranking_next[i, j] = 0
                # print("HERE", i,j)
                machine = getattr(self, f'machine_{j}')
                if (self.current_batch[i, j] > 0) and (machine.count==0) and (self.prod_assignment[i, j] != 0):
                    self.shopsim.process(self.make_products(prod_i=i, machine_j=j))
                
            # Can try an else: here
            yield self.shopsim.timeout(self.step_size/60)

    def sell_products(self, freq=1):
        while True:
            # print(self.stocksell.items)
            if self.shopsim.now % freq == 0:
                for prod in self.stocksell.items:
                    sold = yield self.stocksell.get(lambda x: x==prod)
                    # print(prod, type(prod))
                    print(f'Congrats, you just sold a product {self.prod_dict.get(prod)['Name']} - Value {self.prod_dict.get(prod)['Cost']}')
                    self.sell_log[int(prod)] +=1
                    self.salesrewards += self.prod_dict.get(prod)['Cost']
            yield self.shopsim.timeout(self.step_size)

    def step(self, action):
        # Does he want to enforce a new batch size:
        reward = 0
        truncated = False
        terminated = True if self.shopsim.now == 7*24 else False
        for i, j in np.ndindex(action.force_current_batch.shape):
            if action.force_current_batch[i,j]==1:
                self.current_batch[i,j] = action.current_batch[i,j]
                reward -= 1

        # We set what should be done next and with what urgency
        for i, j in np.ndindex(action.next_batch.shape):
            if self.prod_assignment[i, j] > 0:
                self.next_batch[i, j] = action.next_batch[i, j]
                self.ranking_next[i, j] = action.ranking_next[i, j]

        # We order raw product
        if action.order_raw_prod > 0:
            self.shopsim.process(self.order_raw_product(action.order_raw_prod))
            reward = reward - action.order_raw_prod * 1

        # Execute the Simpy
        self.now_sim += self.step_size
        self.shopsim.run(until=self.now_sim)

        reward += self.salesrewards
        self.salesrewards = 0
        
        observation = self._get_obs()
        info = self._get_info()

        return observation, reward, terminated, truncated, info

In [886]:
gym.register(
    id="TinyShop_env/Shop-v0",
    entry_point=ShopEnv,
    max_episode_steps=7*24,  # Prevent infinite episodes
)

In [887]:
env = gym.make("TinyShop_env/Shop-v0")

In [888]:
env.observation_space

Dict('count_operators_busy': Discrete(2), 'count_operators_free': Discrete(2), 'current_batch_remaining': MultiDiscrete([[1000 1000]
 [1000 1000]]), 'next_batch': MultiDiscrete([[1000 1000]
 [1000 1000]]), 'pending_reception': Discrete(100), 'prod_assignment': Box(0.0, 500.0, (2, 2), float32), 'ranking_next': Box(0.0, 1.0, (2, 2), float32), 'stock2sell_free': Box(0, 10, (1,), int64), 'stock2sell_used': Box(0, 10, (1,), int64), 'stockint_free': Box(0, 20, (1,), int64), 'stockint_used': Box(0, 20, (1,), int64), 'stockraw_free': Box(0, 10, (1,), int64), 'stockraw_used': Box(0, 10, (1,), int64), 'timeday': Discrete(24))

In [889]:
env.action_space

Dict('current_batch': MultiDiscrete([[1000 1000]
 [1000 1000]]), 'force_current_batch': MultiBinary(2), 'next_batch': MultiDiscrete([[1000 1000]
 [1000 1000]]), 'order_raw_prod': Discrete(100), 'ranking_next': Box(0.0, 1.0, (2, 2), float32))

In [890]:
env.reset()

({'stockraw_used': 10,
  'stockraw_free': 90,
  'stockint_used': 0,
  'stockint_free': 50,
  'stock2sell_used': 0,
  'stock2sell_free': 50,
  'current_batch_remaining': array([[0., 0.],
         [0., 0.]]),
  'next_batch': array([[0., 0.],
         [0., 0.]]),
  'ranking_next': array([[0., 0.],
         [0., 0.]]),
  'count_operators_busy': 0,
  'count_operators_free': 2,
  'prod_assignment': array([[ 3.,  0.],
         [10., 15.]]),
  'pending_reception': 0,
  'timeday': 0},
 {'production_log': array([[0., 0.],
         [0., 0.]]),
  'sell_log': array([0., 0.])})

In [896]:
action = Action(
    current_batch=np.array([[5, 0],
                            [5, 5]]),
    force_current_batch=np.array([[0, 0],
                                  [0, 0]]),
    next_batch=np.array([[1, 0],
                         [5, 5]]),
    ranking_next=np.array([[0.1, 0],
                           [0.3, 0.01]]),
    order_raw_prod=0
)
# Take a step
observation, reward, terminated, truncated, info = env.step(action)
print("Reward:", reward)
print("New observation:", observation)

We are in Night mode, get your products in !!
Congrats, you just sold a product B - Value 20
Operator Gave up after 15 minutes weight at 5.266666666666654 - Machine 0
Operator Gave up after 15 minutes weight at 5.299999999999987 - Machine 1
Reward: 20
New observation: {'stockraw_used': 0, 'stockraw_free': 100, 'stockint_used': 0, 'stockint_free': 50, 'stock2sell_used': 0, 'stock2sell_free': 50, 'current_batch_remaining': array([[0., 0.],
       [5., 5.]]), 'next_batch': array([[1., 0.],
       [5., 5.]]), 'ranking_next': array([[0., 0.],
       [0., 0.]]), 'count_operators_busy': 0, 'count_operators_free': 2, 'prod_assignment': array([[ 3.,  0.],
       [10., 15.]]), 'pending_reception': 0, 'timeday': 6}


In [829]:
action

Action(current_batch=array([[5, 0],
       [5, 5]]), force_current_batch=array([[0, 0],
       [0, 0]]), next_batch=array([[1, 0],
       [5, 5]]), ranking_next=array([[0.1 , 0.  ],
       [0.3 , 0.01]]), order_raw_prod=10)

In [87]:
np.zeros((2,3)).argmax()

np.int64(0)

In [128]:
X = np.array([[0.3, 0.6],[0.1, 0.9]])
X#.argmax()

array([[0.3, 0.6],
       [0.1, 0.9]])

In [129]:
np.argmax(X, axis=0)

array([0, 1])

In [130]:
max_idx = np.argmax(X)
row, col = np.unravel_index(max_idx, X.shape)
print(f"Max at ({row}, {col}) = {X[row, col]}")  # Max at (1, 0) = 9

Max at (1, 1) = 0.9


In [131]:
max_idx

np.int64(3)

In [132]:
max_idx = np.argsort(X, axis=None)
max_idx

array([2, 0, 1, 3])

In [605]:
X.sum()

np.float64(1.9)

In [630]:
X[1,0]

np.float64(0.1)

In [672]:
X[0,]

array([0.3, 0.6])

In [673]:
X

array([[0.3, 0.6],
       [0.1, 0.9]])