## Two-Stores Inventory Control

In [None]:
import os
import sys

sys.path.append(os.path.abspath(os.path.join(os.path.abspath("."), os.pardir)))

import itertools
from dataclasses import dataclass, asdict

import pandas as pd
import seaborn as sns
import scipy.stats as ss
import matplotlib.pyplot as plt
import numpy as np

from rl.markov_decision_process import FiniteMarkovDecisionProcess
from rl.distributions import FiniteDistribution, Categorical, Constant
from rl.policy import FinitePolicy
from rl.dynamic_programming.policy_methods import policy_iteration_result

In [None]:
@dataclass(frozen=True)
class Store:
    on_hand: int
    on_order: int

    @property
    def total_inventory(self) -> int:
        return self.on_hand + self.on_order


@dataclass(frozen=True)
class State:
    store1: Store
    store2: Store


@dataclass(frozen=True)
class StoreParams:
    capacity: int
    demand_rate: float
    holding_costs: float
    stockout_costs: float

    @property
    def demand_distribution(self) -> "ss.poisson":
        return ss.poisson(self.demand_rate)


# action space is a tuple of non-negative integers
# 1st = how much to order for store 1 from supplier
# 2nd = how much to order for store 2 from supplier
# 3rd = how much to transfer from store 2 to store 1
# - the negative of this number = how much to transfer from store 1 to store 2
Action = tuple[int, int, int]
SRDistribution = FiniteDistribution[tuple[State, float]]


class TwoStoresInventoryMDP(FiniteMarkovDecisionProcess[State, Action]):
    def __init__(self, store1_params: StoreParams, store2_params: StoreParams) -> None:
        self.store1_params = store1_params
        self.store2_params = store2_params

        super().__init__(mapping=self.generate_mapping())

    def generate_mapping(
        self,
    ) -> dict[State, dict[Action, SRDistribution]]:
        mapping: dict[State, dict[Action, SRDistribution]] = dict()

        store1_pairs = (
            (a, b)
            for a in range(self.store1_params.capacity + 1)
            for b in range(self.store1_params.capacity + 1 - a)
        )

        store2_pairs = (
            (a, b)
            for a in range(self.store2_params.capacity + 1)
            for b in range(self.store2_params.capacity + 1 - a)
        )

        for (a1, b1), (a2, b2) in itertools.product(store1_pairs, store2_pairs):
            action_sr_mapping: dict[Action, SRDistribution] = dict()

            current_state = State(
                store1=Store(on_hand=a1, on_order=b1),
                store2=Store(on_hand=a2, on_order=b2),
            )

            inventory_gap1 = max(
                0,
                self.store1_params.capacity - current_state.store1.total_inventory,
            )

            inventory_gap2 = max(
                0,
                self.store2_params.capacity - current_state.store2.total_inventory,
            )

            for order1_capacity in range(inventory_gap1):
                for order2_capacity in range(inventory_gap2):
                    for transfer in range():
                        action = (
                            order1_capacity,
                            order2_capacity,
                            transfer,
                        )
