In [1]:
import numpy as np
import math

class WholeSeller():

    def __init__(self, no_products, lead_times, life_cycles, lambd_a):
        self.no_products = no_products
        self.lead_times = lead_times
        self.life_cycles = life_cycles
        self.lam = lambd_a

        self.order_history = []
        for i in range(self.no_products):
            order_h = [ self.lam for _ in range(self.lead_times[i])]
            self.order_history.append(order_h)

        self.time = 0

    def orderIn(self, order_qty):
        for i in range(self.no_products):
            self.order_history[i][self.time%self.lead_times[i]] = order_qty


    def step(self):
        self.time += 1

    def reset(self):
        self.time = 0
        del self.order_history
        self.order_history = []
        for i in range(self.no_products):
            order_h = [ self.lam for _ in range(self.lead_times[i])]
            self.order_history.append(order_h)


    def deliver(self, time):
        if time != self.time:
            raise Exception('Time of Wholeseller is not the same as of retailer : {} != {}'.format(self.time , time))

        retval = []
        for i in range(self.no_products):
            retval.append(self.order_history[i][self.time%self.lead_times[i]])
            self.order_history[i][self.time%self.lead_times[i]] = 0

        return retval

In [None]:
import gym
from gym import error, spaces, utils
from gym.utils import seeding
import copy
import numpy as np
import tensorflow as tf


class SupplyChainv0(gym.Env):
    metadata = {'render.modes': ['human']}

    def __init__(self, products=1, low_val=0, high_val=10000, lam=3000, ep_len=1000):
        super(SupplyChainv0, self).__init__()
        N_DISCRETE_ACTIONS = high_val
        N_CHANNELS = 1  # NO OF FEATURES
        HEIGHT = 1
        WIDTH = 1
        self.products = products
        #self.action_space = spaces.Box(low=low_val, high=high_val, shape=(HEIGHT, WIDTH), dtype=np.float32)
        self.action_space = spaces.MultiDiscrete(nvec = [N_DISCRETE_ACTIONS for i in range(products)])
        # Example for using image as input:
        self.observation_space = spaces.MultiDiscrete(nvec = [N_DISCRETE_ACTIONS for i in range(products)])
        #spaces.Box(low=low_val, high=high_val, shape=(HEIGHT, WIDTH),dtype=np.float32)

        self.QuantityInit = [0 for _ in range(products)]
        self.QuantityCurr = copy.deepcopy(self.QuantityInit)
        self.ep_len = ep_len
        self.lam = lam

        """
        Multiplying by random numbers [1, 5] and then taking floors should work but make sure the seed is set everytime
        (To keep consistency make sure to genereate the random numbers First)
        For G, F, LifeCycle and LeadTime
        """

        ## G[i] is the spoilage cost of a product
        self.G = [3 for _ in range(products)]

        ## F[i] is the shortage cost of a product[i]
        self.F = [1 for _ in range(products)]

        ## LifeCycle[i] is the total time for which product is good
        self.LifeCycle = [3 for _ in range(products)]

        ## Leadtime[i] is the time it takes for the product to reach the retailer once the order is placed
        self.LeadTime = [1 for _ in range(products)]

        ## LifeShelf[i] is the time for which a new shipment of product is good before it needs to be thrown out
        self.LifeShelf = []

        ## ProductLifeDB stores the expired date of the product
        ## productLifeDB[i][j] tells us how many (Quantity) of product[i] are going to expire in j days time
        ## E.G productLifeDB[i][0] tells the quantity of product[i] that will be thrown out today
        self.productLifeDB = []

        for LC, LT in zip(self.LifeCycle, self.LeadTime):
            self.LifeShelf.append(LC - LT)
            self.productLifeDB.append([0 for _ in range(LC - LT)])

        self.time = 0
        for i in range(products):
            self.productLifeDB[i][(self.time + self.LifeShelf[i] - 1) % (self.LifeShelf[i])] = self.QuantityCurr[i]

        self.wholeSeller = WholeSeller(no_products=self.products, lead_times=self.LeadTime, life_cycles=self.LifeCycle,
                                       lambd_a=self.lam)
        self.done = False
        self.orders = [np.random.poisson(size=ep_len, lam=self.lam)for _ in range(products)]
        self.spoilageRate = [0 for _ in range(products)]
        self.costOfStorage = [math.floor(self.lam * 1.2) for _ in range(products)]
        self.expiredProducts = [0 for _ in range(products)]
        self.shortageInDemand = [0 for _ in range(products)]

    ##--------------------------------------------------------------------------------------------------------------------------------------------

    def _get_spoilage(self):
        retval = []
        for i in range(self.products):
            retval.append((1.2 * self.expiredProducts[i] + 0.3 * self.shortageInDemand[i]) / (1.2*(self.QuantityCurr[i] + 1)))
        return retval

        ##--------------------------------------------------------------------------------------------------------------------------------------------

    def _get_cost_of_storage(self):
        return self.QuantityCurr

    ##--------------------------------------------------------------------------------------------------------------------------------------------

    def _get_next_demand(self):
        retval = [np.random.poisson(size=1, lam=self.lam) for _ in range(products)]
        return retval

    ##--------------------------------------------------------------------------------------------------------------------------------------------

    def step(self, action):
        delivery = self.wholeSeller.deliver(self.time)
        for i in range(self.products):
            self.QuantityCurr[i] += delivery[i]
            self.productLifeDB[i][(self.time + self.LifeShelf[i] - 1) % (self.LifeShelf[i])] += delivery[i]

        self.costOfStorage = self._get_cost_of_storage();
        D = []
        for i in range(self.products):
            D.append(self.orders[i][self.time])
            if self.QuantityCurr[i] - D[i] >= 0:
                # Demand Met
                self.QuantityCurr[i] -= D[i]
                self.shortageInDemand[i] = 0
                index = 0
                consumed = 0
                while index < self.LifeShelf[i]:
                    consumed += self.productLifeDB[i][(self.time%self.LifeShelf[i] + index)%self.LifeShelf[i]]
                    if consumed >= D[i]:
                        self.productLifeDB[i][(self.time%self.LifeShelf[i] + index)%self.LifeShelf[i]] = consumed - D[i]
                        break
                    else:
                        self.productLifeDB[i][(self.time%self.LifeShelf[i] + index)%self.LifeShelf[i]] = 0
                    index += 1

                self.expiredProducts[i] = self.productLifeDB[i][self.time % self.LifeShelf[i]]
                self.QuantityCurr[i] -= self.productLifeDB[i][self.time % self.LifeShelf[i]]
                self.productLifeDB[i][self.time % self.LifeShelf[i]] = 0
            else:
                self.shortageInDemand[i] = D[i] - self.QuantityCurr[i]
                self.QuantityCurr[i] = 0
                for index in range(self.LifeShelf[i]):
                    self.productLifeDB[i][index] = 0
                self.expiredProducts[i] = 0

        # Calculatin Expired Goods
        """
        for i in range(self.products):
            surplus = self.productLifeDB[i][(self.time + self.LifeShelf[i] - 1) % (self.LifeShelf[i] )]
            if surplus > 0:
                self.expiredProducts[i] = surplus
                self.QuantityCurr[i] -= surplus
                self.productLifeDB[i][(self.time + self.LifeShelf[i] - 1) % (self.LifeShelf[i])] = delivery[i]
            else:
                self.expiredProducts[i] = 0
        """
        self.spoilageRate = self._get_spoilage()
        self.wholeSeller.step()
        self.time += 1
        """
        Logic to handle aging of goods
        """
        self.wholeSeller.orderIn(action)
        if self.time > self.ep_len:
            self.done = True

        info = {'Spoliage Rate':self._get_spoilage(), 'Cost of Storage':self._get_cost_of_storage()}

        # return observation, reward, done, info
        return self._get_state(), self._get_reward(), self.done, info

    ##--------------------------------------------------------------------------------------------------------------------------------------------

    def reset(self):
        self.QuantityCurr = copy.deepcopy(self.QuantityInit)
        self.time = 0
        self.done = False
        self.orders = [np.random.poisson(size=self.ep_len, lam=self.lam)for _ in range(self.products)]

        self.productLifeDB = []

        for LC, LT in zip(self.LifeCycle, self.LeadTime):
            self.productLifeDB.append([0 for _ in range(LC - LT)])

        for i in range(self.products):
            self.productLifeDB[i][(self.time + self.LifeShelf[i] - 1) % (self.LifeShelf[i])] = self.QuantityCurr[i]

        self.wholeSeller.reset()

        self.spoilageRate = [0 for _ in range(self.products)]
        self.costOfStorage = [math.floor(self.lam * 1.2) for _ in range(self.products)]
        self.expiredProducts = [0 for _ in range(self.products)]
        self.shortageInDemand = [0 for _ in range(self.products)]

        return self._get_state()

    ##--------------------------------------------------------------------------------------------------------------------------------------------

    def _get_state(self):
        return np.array(self.QuantityCurr)

    ##--------------------------------------------------------------------------------------------------------------------------------------------

    def _get_reward(self):
        retval = 0.0
        for i in range(self.products):
            retval += self.G[i] * self.expiredProducts[i] + self.F[i] * self.shortageInDemand[i]
        return retval

    def render(self, mode='human', close=False):
        pass


ModuleNotFoundError: No module named 'numpy.core._multiarray_umath'

SystemError: <class '_frozen_importlib._ModuleLockManager'> returned a result with an error set

ImportError: numpy.core._multiarray_umath failed to import

ImportError: numpy.core.umath failed to import

In [None]:
import gym

from stable_baselines.common.policies import MlpPolicy
from stable_baselines.common.vec_env import SubprocVecEnv 
from stable_baselines.common import make_vec_env
from stable_baselines import A2C
from stable_baselines.common.callbacks import BaseCallback


# multiprocess environment
n_cpu = 4
env = make_vec_env(SupplyChainv0 , n_envs=1)
#env = SubprocVecEnv([lambda: SupplyChainv0 for i in range(n_cpu)])
model = A2C(MlpPolicy, env, tensorboard_log="./sac/", verbose=1)


class TensorboardCallback(BaseCallback):
    """
    Custom callback for plotting additional values in tensorboard.
    """
    def __init__(self, verbose=0):
        self.is_tb_set = False
        super(TensorboardCallback, self).__init__(verbose)

    def _on_step(self) -> bool:
        # Log additional tensor
        if not self.is_tb_set:
            with self.model.graph.as_default():
                tf.summary.scalar('value_target', tf.reduce_mean(self.model.value_target))
                self.model.summary = tf.summary.merge_all()
            self.is_tb_set = True
        # Log scalar value (here a random variable)
        value = self.model.get_env()._get_spoilage()
        summary = tf.Summary(value=[tf.Summary.Value(tag='Spoilage_Rate', simple_value=value)])
        self.locals['writer'].add_summary(summary, self.num_timesteps)
        return True


model.learn(10000, callback=TensorboardCallback())
model.save("a2c_cartpole")

del model # remove to demonstrate saving and loading

model = A2C.load("a2c_cartpole", tensorboard_log="/tmp/sac/", verbose=1)

obs = env.reset()
while True:
    action, _states = model.predict(obs)
    obs, rewards, dones, info = env.step(action)
    #env.render()

In [None]:
from stable_baselines.common.env_checker import check_env

env = SupplyChainv0()
# It will check your custom environment and output additional warnings if needed
check_env(env)

In [None]:

"""
env = SupplyChainv0
state = env.reset()
for i in range(1000):
    state, reward, done, info = env.step(env.action_space.sample())
    print(info[0] , info[1])
"""

In [None]:



model.learn(50000, callback=TensorboardCallback())