## Bicycle Inventory - deterministic policies

In [None]:
import os
import sys

sys.path.append(os.path.abspath(os.path.join(os.path.abspath("."), os.pardir)))

from dataclasses import dataclass
import numpy as np

from rl.distributions import SampledDistribution
from rl.markov_process import NonTerminal
from rl.policy import DeterministicPolicy, Policy

In [None]:
@dataclass(frozen=True)
class Inventory:
    on_hand: int
    on_order: int

    @property
    def total_inventory(self) -> int:
        return self.on_hand + self.on_order


class InventoryDeterministicPolicy(DeterministicPolicy[Inventory, int]):
    def __init__(self, reorder_point: int) -> None:
        self.reorder_point: int = reorder_point

        def action_for(state: Inventory) -> int:
            return max(self.reorder_point - state.total_inventory, 0)

        super().__init__(action_for=action_for)


class InventoryStochasticPolicy(Policy[Inventory, int]):
    def __init__(self, reorder_point_poisson_mean: float):
        self.reorder_point_poisson_mean: float = reorder_point_poisson_mean

    def act(self, state: NonTerminal[Inventory]) -> SampledDistribution[int]:
        def action_func(state: NonTerminal[Inventory] = state) -> int:
            # let's model the reorder point as a Poisson random variable
            sampled_reorder_point: int = np.random.poisson(
                self.reorder_point_poisson_mean
            )
            return max(sampled_reorder_point - state.state.total_inventory, 0)

        return SampledDistribution(sampler=action_func)

In [3]:
deterministic_policy = InventoryDeterministicPolicy(reorder_point=8)