In [38]:
import random
import numpy as np
order_cost_fixed = 75
carrying_cost_per_unit = 75
lost_cost_per_unit = 18
initial_stock = 115
demand_range = (0, 99)
lead_time = 3
oneyear = 365
policies = [(125, 150), (125, 250), (150, 250), (175, 250), (175, 300)]
gamma = 1.0 # no discounting here, gamma value assumed to be 1.
def step(state, P, Q):
    stock, order_timer = state
    if order_timer == 1:
        stock += Q
        order_timer = 0
    elif order_timer > 1:
        order_timer -= 1
    demand = random.randint(*demand_range)
    if demand > stock:
        lost_units = demand - stock
        stock = 0
    else:
        lost_units = 0
        stock -= demand
    cost_carrying = stock * carrying_cost_per_unit
    cost_lost = lost_units * lost_cost_per_unit
    cost_order = 0
    if stock > 0 and order_timer == 0 and stock <= P:
        order_timer = lead_time
        cost_order = order_cost_fixed
    total_cost = cost_carrying + cost_lost + cost_order
    next_state = (stock, order_timer)
    terminal = (stock == 0)
    return next_state, total_cost, terminal
def td_policy_evaluation(P, Q, num_episodes=10000, alpha=0.1): # alpha value assumed to be 0.1.
    V = {}
    def getV(state):
        return V.get(state, 0.0)
    for episode in range(num_episodes):
        state = (initial_stock, 0)
        while True:
            next_state, total_cost, terminal = step(state, P, Q)
            V[state] = getV(state) + alpha * (total_cost + gamma * getV(next_state) - getV(state))
            if terminal:
                break
            state = next_state
    return V
results = {}
for (P, Q) in policies:
    V = td_policy_evaluation(P, Q)
    initial_value = V.get((initial_stock, 0), 0.0)
    results[(P, Q)] = initial_value
print("TD(0) Evaluation of Policies for One Cycle (Initial State to Zero Stock):")
print("Policy || Estimated Costs Incurred for One Cycle")
for (P, Q), value in results.items():
    print(f"P={P} and Q={Q} | {value:.2f}")

TD(0) Evaluation of Policies for One Cycle (Initial State to Zero Stock):
Policy || Estimated Costs Incurred for One Cycle
P=125 and Q=150 | 13862.65
P=125 and Q=250 | 21615.25
P=150 and Q=250 | 29022.54
P=175 and Q=250 | 41127.75
P=175 and Q=300 | 55218.52
