<a href="https://colab.research.google.com/github/nosadchiy/public/blob/main/ValueIteration.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np

# ----------------------------
# PARAMETERS
# ----------------------------
# Fixed ordering cost and per-unit cost
K = 20.0         # fixed ordering cost
c = 2.0          # unit ordering cost

# Holding cost and penalty (shortage) cost
h = 1.0          # holding cost per unit
p = 20.0         # penalty cost per unit (increased to push up inventory)

# Discount factor
beta = 0.95

# State space: inventory levels 0,1,...,X_max
X_max = 50
states = np.arange(X_max+1)

# Action space: possible order quantities 0,1,...,Q_max
Q_max = 50
actions = np.arange(Q_max+1)

# ----------------------------
# DEMAND DISTRIBUTION
# ----------------------------
# We use a demand distribution over {0,...,d_max} that puts more weight on higher demand values.
d_max = 10
# For example, these probabilities favor higher demand:
P_d = np.array([0.005, 0.005, 0.01, 0.03, 0.05, 0.1, 0.15, 0.2, 0.2, 0.15, 0.1])
# Normalize (should already sum to 1)
P_d = P_d / np.sum(P_d)
d_vals = np.arange(d_max+1)

# ----------------------------
# VALUE ITERATION PARAMETERS
# ----------------------------
tol = 1e-5    # convergence tolerance
max_iter = 1000

# Initialize value function and policy arrays
V = np.zeros(len(states))
policy = np.zeros(len(states), dtype=int)

# ----------------------------
# VALUE ITERATION ALGORITHM
# ----------------------------
for it in range(max_iter):
    V_new = np.zeros_like(V)
    delta = 0.0
    for ix, x in enumerate(states):
        action_costs = []
        for q in actions:
            # Ordering cost: fixed cost (if q > 0) plus per-unit cost
            cost_order = (K if q > 0 else 0.0) + c * q
            exp_cost = 0.0
            for d, prob in zip(d_vals, P_d):
                inv_after = x + q - d  # inventory after demand
                if inv_after >= 0:
                    cost_inventory = h * inv_after
                    next_state = min(int(inv_after), X_max)
                else:
                    cost_inventory = p * (-inv_after)
                    next_state = 0  # lost sales: next period starts with 0 inventory
                exp_cost += prob * (cost_inventory + beta * V[next_state])
            total_cost = cost_order + exp_cost
            action_costs.append(total_cost)
        best_cost = min(action_costs)
        best_action = actions[np.argmin(action_costs)]
        V_new[ix] = best_cost
        policy[ix] = best_action
        delta = max(delta, abs(V_new[ix] - V[ix]))
    V = V_new.copy()
    if delta < tol:
        print(f"Value iteration converged in {it+1} iterations.")
        break
else:
    print("Value iteration did not converge within the maximum number of iterations.")

# ----------------------------
# OUTPUT RESULTS
# ----------------------------
print("\nOptimal Policy (order quantity) for each inventory level:")
for x, q in enumerate(policy):
    print(f"Inventory level {x:2d} -> Order {q:2d} units")


Value iteration converged in 294 iterations.

Optimal Policy (order quantity) for each inventory level:
Inventory level  0 -> Order 18 units
Inventory level  1 -> Order 17 units
Inventory level  2 -> Order 16 units
Inventory level  3 -> Order 15 units
Inventory level  4 -> Order 14 units
Inventory level  5 -> Order 13 units
Inventory level  6 -> Order 12 units
Inventory level  7 -> Order  0 units
Inventory level  8 -> Order  0 units
Inventory level  9 -> Order  0 units
Inventory level 10 -> Order  0 units
Inventory level 11 -> Order  0 units
Inventory level 12 -> Order  0 units
Inventory level 13 -> Order  0 units
Inventory level 14 -> Order  0 units
Inventory level 15 -> Order  0 units
Inventory level 16 -> Order  0 units
Inventory level 17 -> Order  0 units
Inventory level 18 -> Order  0 units
Inventory level 19 -> Order  0 units
Inventory level 20 -> Order  0 units
Inventory level 21 -> Order  0 units
Inventory level 22 -> Order  0 units
Inventory level 23 -> Order  0 units
Inventor