In [28]:
from __future__ import annotations
import sys
sys.path.append('..')  #let me import stuff from the path above

import numpy as np
import itertools
import matplotlib.pyplot as plt
from abc import ABC, abstractmethod
from dataclasses import dataclass
from typing import Generic, Callable, TypeVar, Iterable,\
Optional, Mapping, Tuple
from collections import defaultdict
from distribution import Distribution, Categorical, SampledDistribution, Constant, \
FiniteDistribution
from scipy.stats import poisson
from markov_decision_process import *

@dataclass(frozen=True)
class InventoryState:
    on_hand: int
    on_order: int
    
    def inventory_position(self) -> int:
        return self.on_hand + self.on_order

InvOrerMapping = Mapping[
    InventoryState,
    Mapping[int, Categorical[Tuple[InventoryState, float]]]
]

class SimpleInventoryMDPCap(FiniteMarkovDecisionProcess[InventoryState, int]):
    def __init__(
        self,
        capacity: int,
        poisson_lambda:float,
        holding_cost:float,
        stockout_cost: float
    ):
        self.capacity: int = capacity
        self.poisson_lambda: float = poisson_lambda
        self.holding_cost: float = holding_cost
        self.stockout_cost: float = stockout_cost
        self.poisson_distr = poisson(poisson_lambda)
        super().__init__(self.get_action_transition_reward_map())
        
    def get_action_transition_reward_map(self) -> InvOrderMapping:
        d: Dict[InventoryState, Dict[int, Categorical[Tuple[InventoryState, float]]]] = {}
        
        for alpha in range(self.capacity + 1):
            for beta in range(self.capacity + 1 - alpha):
                state: InventoryState = InventoryState(alpha, beta)
                ip: int = state.inventory_position()
                base_reward: float = - self.holding_cost * alpha
                d1: Dict[int, Categorical[Tuple[InventoryState, float]]] = {}
                
                for order in range(self.capacity - ip + 1):
                    sr_probs_dict: Dict[tuple[InventoryState, float], float] =\
                        {(InventoryState(ip - i, order), base_reward):
                         self.poisson_distr.pmf(i) for i in range(ip)}
                    
                    probability: float = 1 - self.poisson_distr.cdf(ip - 1)
                    reward: float = base_reward - self.stockout_cost *\
                        (probability * (self.poisson_lambda - ip) +
                         ip * self.poisson_distr.pmf(ip))
                    sr_probs_dict[(WInventoryState(0, order), reward)] = \
                        probability
                    d1[order] = Categorical(sr_probs_dict)    
                d[state] = d1
                
        return d
        
user_capacity = 2
user_poisson_lambda = 1.0
user_holding_cost = 1.0
user_stockout_cost = 10.0

si_mdp: FiniteMarkovDecisionProcess[InventoryState, int] = \
    SimpleInventoryMDPCap(
        capacity=user_capacity,
        poisson_lambda=user_poisson_lambda,
        holding_cost=user_holding_cost,
        stockout_cost=user_stockout_cost
)
fdp: FiniteDeterministicPolicy[InventoryState, int] =\
    FiniteDeterministicPolicy(
        {InventoryState(alpha,beta): user_capacity - (alpha + beta)
         for alpha in range(user_capacity + 1)
         for beta in range(user_capacity + 1 - alpha)}
)

implied_mrp: FiniteMarkovRewardProcess[InventoryState] = \
    si_mdp.apply_finite_policy(fdp)

In [29]:
import dynamic_programming as dp
import importlib
importlib.reload(dp) 

dp.evaluate_mrp_result(implied_mrp,0.9)
# si_mdp.mapping[NonTerminal(InventoryState(0,1))][1].expectation(lambda x: x[1])

{NonTerminal(state=InventoryState(on_hand=0, on_order=0)): -10.0,
 NonTerminal(state=InventoryState(on_hand=0, on_order=1)): -3.927342083242136,
 NonTerminal(state=InventoryState(on_hand=0, on_order=2)): -3.8382469453207233,
 NonTerminal(state=InventoryState(on_hand=1, on_order=0)): -4.927342083242136,
 NonTerminal(state=InventoryState(on_hand=1, on_order=1)): -4.838246945320723,
 NonTerminal(state=InventoryState(on_hand=2, on_order=0)): -5.838246945320723}

In [30]:
dp.policy_iteration_result(si_mdp,0.9)

({NonTerminal(state=InventoryState(on_hand=0, on_order=0)): -10.0,
  NonTerminal(state=InventoryState(on_hand=0, on_order=1)): -3.927342083242136,
  NonTerminal(state=InventoryState(on_hand=0, on_order=2)): -3.8382469453207233,
  NonTerminal(state=InventoryState(on_hand=1, on_order=0)): -4.927342083242136,
  NonTerminal(state=InventoryState(on_hand=1, on_order=1)): -4.838246945320723,
  NonTerminal(state=InventoryState(on_hand=2, on_order=0)): -5.838246945320723},
 For State InventoryState(on_hand=0, on_order=0): Do Action 0
 For State InventoryState(on_hand=0, on_order=1): Do Action 1
 For State InventoryState(on_hand=0, on_order=2): Do Action 0
 For State InventoryState(on_hand=1, on_order=0): Do Action 1
 For State InventoryState(on_hand=1, on_order=1): Do Action 0
 For State InventoryState(on_hand=2, on_order=0): Do Action 0)

In [31]:
dp.value_iteration_result(si_mdp,0.9)

({NonTerminal(state=InventoryState(on_hand=0, on_order=0)): -10.0,
  NonTerminal(state=InventoryState(on_hand=0, on_order=1)): -3.927342083242136,
  NonTerminal(state=InventoryState(on_hand=0, on_order=2)): -3.8382469453207237,
  NonTerminal(state=InventoryState(on_hand=1, on_order=0)): -4.927342083242136,
  NonTerminal(state=InventoryState(on_hand=1, on_order=1)): -4.838246945320723,
  NonTerminal(state=InventoryState(on_hand=2, on_order=0)): -5.838246945320723},
 For State InventoryState(on_hand=0, on_order=0): Do Action 0
 For State InventoryState(on_hand=0, on_order=1): Do Action 1
 For State InventoryState(on_hand=0, on_order=2): Do Action 0
 For State InventoryState(on_hand=1, on_order=0): Do Action 1
 For State InventoryState(on_hand=1, on_order=1): Do Action 0
 For State InventoryState(on_hand=2, on_order=0): Do Action 0)

In [21]:
import finite_horizon as fh
import importlib
importlib.reload(fh) 
fh.unwrap_finite_horizon_MDP(si_mdp)

AttributeError: 'InventoryState' object has no attribute 'state'