In [50]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import scipy.stats as stats
from state_space_setup import *

In [51]:
def transition_probabilities(old_state, new_state, action):
    """
    Input: old_state (np.array): The previous state of the system.
           new_state (np.array): The potential next state of the system.
           action (int): The action taken, representing the number of parts ordered.
    Output: prob (float): The probability of transitioning from old_state to new_state given the action.

    This function calculates the transition probability from one state to another given an action.
    It is important to note that this function assumes the old_state and action are valid to provide
    more general applicability. The transition probabilities are base on three assumputions:
    1. Parts break down as a Poisson process (truncated at 0) with parameter lambda_
    2. The probability of the outstanding parts being delivered is fixed at p for any order size
    3. The newly ordered parts are added to the current outstaning order immediately.
    """
    # Parameters to set
    lambda_ = 2  # Average number of parts breaking down per week
    p = 0.9  # Probability of receiving the ordered parts

    if new_state[1] == 0:
        if (old_state[1] == 0) and (action == 0):
            prob = 1
        else:
            prob = p

        if new_state[0] <= (old_state[0] + old_state[1] + action) and new_state[0] > (old_state[1] + action):
            prob *= stats.poisson.pmf(old_state[0] + old_state[1] + action - new_state[0], mu=lambda_)
        elif new_state[0] == (old_state[1] + action):
            prob *= stats.poisson.sf(old_state[0] - 1, mu=lambda_)
        else:
            prob = 0
    elif new_state[1] == (old_state[1] + action):
        prob = 1 - p
        if new_state[0] <= old_state[0] and new_state[0] > 0:
            prob *= stats.poisson.pmf(old_state[0] - new_state[0],mu=lambda_)
        elif new_state[0] == 0:
            prob *= stats.poisson.sf(old_state[0] - 1, mu=lambda_)
        else:
            prob = 0
    else:
        prob = 0
    return prob

In [52]:
maximum_parts =  41

state_space = get_state_space(maximum_parts)


# generate the transition probability matrix
trans_prob_matrix = np.zeros((len(state_space), len(state_space), maximum_parts + 1))
# iterate over all old and new states and actions to fill the transition probability matrix
for i, s_old in enumerate(state_space):
    for j, s_new in enumerate(state_space):
        for a in range(maximum_parts + 1):
            trans_prob_matrix[i, j, a] = transition_probabilities(s_old, s_new, a)

In [53]:
def downtime_cost(num_parts, k=10080, lambda_=2, cost_per_week=16800):
    """
    Input: num_parts (int): The number of parts currently in the inventory.
           k (int): The number of periods in a week (default is 168 hours).
           lambda_ (float): The average number of parts breaking down per week (default is 2).
           cost_per_week (float): The cost incurred if the machine is not running for 
                                  an entire week (default is 16800).
    Output: cost (float): The cost incurred if shortage occurs in the next period for the 
                          given initial inventory level.

    This function calculates the shortage cost for a given state if shortage occurs in 
    the next period. Based on the model assumption - parts breaking as a Poisson process - 
    the shortage cost is calculated so that we get the same expected shortage cost as if we 
    assumed we split the week into k periods and penalise according to the number of periods the 
    machine was out of order.
    """
    if num_parts == 0:
        return cost_per_week

    # vector of numbers of periods of downtime, from 1 to k (e.g., 168 hours a week)
    m = np.arange(1, k + 1)
    # vector of numbers of parts broken before the period in which the last part breaks
    i = np.arange(num_parts)
    # vector of expected number of parts breaking down over periods 1 to k - m for each m
    lambda_m = (lambda_ / k) * (k - m[:, None])  

    # matrix of probabilities for each number of parts breaking down before the period 
    # in which the last part breaks for each such period possible
    pmf = stats.poisson.pmf(i[None, :], lambda_m)
    # vector of probabilities of all the remaining parts breaking down in the next period
    # for each possible number of parts broken before the period in which the last part breaks
    sf = stats.poisson.sf(num_parts - i - 1, lambda_ / k)

    # vector of probabilities of exactly m periods of downtime calculated using the above
    prob = np.sum(pmf * sf, axis=1)
    # weighted cost for each period of downtime, where m/k is the fraction of the week
    weighted_cost = prob * (m / k) * cost_per_week
    # total expected cost as the sum of weighted costs
    total_cost = np.sum(weighted_cost)
    # normalisation by the probability of running out of parts by next week, to obtain
    # the correct expected cost when multiplying by the probability of running out of parts
    normalisation = stats.poisson.sf(num_parts - 1, lambda_)

    return total_cost / normalisation


downtime_vector = np.array([downtime_cost(num_parts) for num_parts in np.arange(maximum_parts + 1)])


def cost_function(old_state, new_state, action, ordering_cost=200):
    """
    Input: old_state (np.array): The previous state of the system.
           action (int): The action taken, representing the number of parts ordered.
           new_state (np.array): The potential next state of the system.
    Output: cost (float): The cost incurred by taking the action in the old state and transitioning to the new state.

    This function calculates the cost incurred by taking an action in a given state and transitioning to a new state.
    The cost is based on the number of parts ordered and the inventory levels before and after the transition.
    """
    global downtime_vector

    holding_cost_as_percentage = 0.008  # percentage of the cost of a part to be paid for holding it in inventory
    price_per_part = 100  # price per part bought

    # calculate the total cost of the order made
    if action == 0:
        order_cost = 0
    else:
        order_cost = ordering_cost + price_per_part * action

    # calculate the shortage cost if the new state has only the newly arrived parts
    if (new_state[0] == (old_state[1] + action) and new_state[1] == 0) or (new_state[0] == 0):
        shortage_cost = downtime_vector[old_state[0]]
    else:
        shortage_cost = 0

    # calculate holding cost for the new inventory level (paying for both delivered and ordered)
    holding_cost = holding_cost_as_percentage * price_per_part * (new_state[0] + new_state[1])

    return holding_cost + order_cost + shortage_cost


def expected_action_value(current_state, action, values):
    """
    Input: current_state (tuple): The current state of the system.
           action (int): The action taken, representing the number of parts ordered.
           values (np.array): The value function for each state.
    Output: expected_value (float): The expected value of taking the action in the current state.
    """
    # Using global variables for the matrices and the maximum number of parts
    global maximum_parts
    global cost_matrix
    global trans_prob_matrix

    discount_factor = 0.995  # discount factor for future rewards

    # get the index of the current state in the state space
    current_state_idx = get_index(current_state[0], current_state[1], maximum_parts)

    probs = trans_prob_matrix[current_state_idx, :, action]
    costs = cost_matrix[current_state_idx, :, action]
    expected_value = np.dot(probs, costs + discount_factor * values)
    return expected_value

In [54]:
ordering_costs = np.arange(0, 201, 10)
state_tuples = list(map(tuple, state_space))
policies = np.zeros((len(ordering_costs), 2))

# Value iteration setup
error = 1e-3
values = np.zeros(len(state_space))
delta = 2 * error

In [55]:
for k, cost in enumerate(ordering_costs):
    cost_matrix = np.zeros((len(state_space), len(state_space), maximum_parts + 1))
    for i, s_old in enumerate(state_space):
        for j, s_new in enumerate(state_space):
            for a in range(maximum_parts + 1):
                cost_matrix[i, j, a] = cost_function(s_old, s_new, a, ordering_cost=cost)


    while delta >= error:
        delta = 0
        old_values = np.copy(values)
        for i, state in enumerate(state_space):
            v = values[i]
            action_space = get_action_space(state, maximum_parts)
            action_costs = np.array([
                expected_action_value(state, act, old_values)
                for act in action_space
            ])
            values[i] = np.min(action_costs)
            delta = max(delta, abs(v - values[i]))
        

    policy_records = []
    for state in state_tuples:
        action_space = get_action_space(state, maximum_parts)
        action_costs = np.array([
            expected_action_value(state, act, values)
            for act in action_space
        ])
        order = action_space[np.argmin(action_costs)]
        policy_records.append((state, order))

    policy_df = pd.DataFrame(policy_records, columns=["State", "Order_size"])
    policy_df["IP"] = policy_df["State"].apply(sum)
    policy_df["New_IP"] = policy_df["IP"] + policy_df["Order_size"]
    filtered_policy_df = policy_df[policy_df["Order_size"] != 0]
    s = filtered_policy_df["IP"].max()
    S = filtered_policy_df["New_IP"].max()
    policies[k] = [s, S]
    print(f"Iteration {k + 1}/{len(ordering_costs)} completed. For delivery cost {cost} we get, s = {s}, S = {S}")
    values = np.zeros(len(state_space))
    delta = 2 * error



    

Iteration 1/21 completed. For delivery cost 0 we get, s = 15, S = 16
Iteration 2/21 completed. For delivery cost 10 we get, s = 13, S = 19
Iteration 3/21 completed. For delivery cost 20 we get, s = 12, S = 21
Iteration 4/21 completed. For delivery cost 30 we get, s = 12, S = 22
Iteration 5/21 completed. For delivery cost 40 we get, s = 12, S = 23
Iteration 6/21 completed. For delivery cost 50 we get, s = 12, S = 25
Iteration 7/21 completed. For delivery cost 60 we get, s = 12, S = 26
Iteration 8/21 completed. For delivery cost 70 we get, s = 12, S = 27
Iteration 9/21 completed. For delivery cost 80 we get, s = 12, S = 28
Iteration 10/21 completed. For delivery cost 90 we get, s = 12, S = 28
Iteration 11/21 completed. For delivery cost 100 we get, s = 12, S = 29
Iteration 12/21 completed. For delivery cost 110 we get, s = 11, S = 30
Iteration 13/21 completed. For delivery cost 120 we get, s = 11, S = 31
Iteration 14/21 completed. For delivery cost 130 we get, s = 11, S = 32
Iteration 15