In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Input
import matplotlib.pyplot as plt
import pandas as pd
from scipy.optimize import differential_evolution, dual_annealing
import logging

# Define common configurations for platelet inventory
CONFIG = {
    'fixed_transaction_cost': 750,  # Cost per transaction
    'variable_transaction_cost': 5,  # Cost per unit transported
    'holding_cost': 5,  # Increased holding cost to penalize overstocking
    'shortage_cost': 100,  # Penalty per unit of unsatisfied demand
    'wastage_cost': 50,  # Increased wastage cost to penalize expiry
    'demand_mean': 30,  # Mean demand per day
    'demand_std': 15,  # Increased standard deviation for realistic variability
    'max_inventory': 300,  # Maximum inventory capacity
    'shelf_life': 5,  # Platelet shelf life in days
    'fixed_lead_time': 1  # Lead time for replenishment
}

# Set up logging
logging.basicConfig(level=logging.INFO)

# Demand generator
class HalfNormalGenerator:
    def __init__(self, mean, std):
        self.mean = mean
        self.std = std

    def next(self):
        return max(0, np.random.normal(self.mean, self.std))

# Supply Chain Environment
class PlateletSupplyChainEnvironment:
    def __init__(self, config):
        self.config = config
        self.shelf_life = config['shelf_life']
        self.demand_generator = HalfNormalGenerator(config['demand_mean'], config['demand_std'])
        self.reset()

    def reset(self):
        self.inventory = np.random.randint(50, 100)
        self.inventory_ages = [0] * self.inventory
        self.orders = []
        self.total_cost = 0
        self.total_demand = 0
        self.total_filled_demand = 0
        self.total_waste = 0
        return self.inventory

    def step(self, order_quantity):
        demand = int(self.demand_generator.next())
        self.total_demand += demand
        filled_demand = min(demand, len(self.inventory_ages))
        self.inventory_ages = self.inventory_ages[filled_demand:]  # Remove fulfilled inventory
        self.total_filled_demand += filled_demand

        # Process wastage
        expired_units = self.inventory_ages.count(self.shelf_life)
        self.total_waste += expired_units
        self.inventory_ages = [age + 1 for age in self.inventory_ages if age < self.shelf_life]

        # Process order
        if order_quantity > 0:
            order_quantity = int(order_quantity)
            self.inventory_ages.extend([0] * order_quantity)
            self.orders.append(order_quantity)
        else:
            self.orders.append(0)

        # Calculate costs
        fixed_cost = self.config['fixed_transaction_cost'] if order_quantity > 0 else 0
        variable_cost = self.config['variable_transaction_cost'] * order_quantity
        holding_cost = self.config['holding_cost'] * len(self.inventory_ages)
        lost_sales_cost = self.config['shortage_cost'] * (demand - filled_demand)
        wastage_cost = self.config['wastage_cost'] * expired_units

        total_cost = fixed_cost + variable_cost + holding_cost + lost_sales_cost + wastage_cost
        self.total_cost += total_cost

        reward = -total_cost
        return len(self.inventory_ages), reward, fixed_cost, variable_cost, holding_cost, lost_sales_cost, wastage_cost

# Train SQ and RS policies
class sQInventoryPolicy:
    def __init__(self, s, Q):
        self.s = s
        self.Q = Q

    # Adjusted logic for SQ policy to correctly evaluate inventory levels
    def order(self, inventory):
        return self.Q if inventory <= self.s else 0

class RSInventoryPolicy:
    def __init__(self, R, S):
        self.R = R
        self.S = S

    def order(self, inventory, timestep):
        return self.S - inventory if timestep % self.R == 0 and inventory <= self.S else 0

# SQ and RS Optimization
def rs_objective(params, training_data, config):
    R, S = params
    env = PlateletSupplyChainEnvironment(config)
    total_cost = 0
    policy = RSInventoryPolicy(R, S)
    for timestep, row in enumerate(training_data.itertuples()):
        state, demand = row.State, row.Demand
        order = policy.order(state, timestep)
        _, reward, *_ = env.step(order)
        total_cost -= reward
    return total_cost

def train_evaluate_rs_policy_optimized(training_data, config):
    bounds = [(1, 10), (10, config['max_inventory'])]
    result = dual_annealing(rs_objective, bounds=bounds, args=(training_data, config))
    return int(result.x[0]), int(result.x[1]), result.fun

def sq_objective(params, training_data, config):
    s, Q = params
    env = PlateletSupplyChainEnvironment(config)
    total_cost = 0
    policy = sQInventoryPolicy(s, Q)
    for _, row in training_data.iterrows():
        state, demand = row['State'], row['Demand']
        order = policy.order(state)
        _, reward, *_ = env.step(order)
        total_cost -= reward
    return total_cost

def train_evaluate_sq_policy_optimized(training_data, config):
    bounds = [(1, config['max_inventory'] // 3), (10, config['max_inventory'])]
    result = differential_evolution(sq_objective, bounds=bounds, args=(training_data, config))
    return int(result.x[0]), int(result.x[1]), result.fun

# Generate data
def generate_data(episodes, timesteps, config):
    env = PlateletSupplyChainEnvironment(config)
    data = []
    for _ in range(episodes):
        state = env.reset()
        for _ in range(timesteps):
            demand = env.demand_generator.next()
            data.append([state, demand])
    return pd.DataFrame(data, columns=['State', 'Demand'])

# Evaluate SQ and RS on Data (Generic Function for Training and Testing)
# Updated function to ensure all arrays in the results dictionary have consistent lengths.
def evaluate_policies_sq_and_rs(data, best_s, best_Q, best_R, best_S, config):
    env = PlateletSupplyChainEnvironment(config)
    results = {
        "demand": [], "sq_filled": [], "rs_filled": [], "lost_demand": [],
        "sq_inventory": [], "rs_inventory": [], "sq_orders": [], "rs_orders": [],
        "sq_costs": [], "rs_costs": [], "sq_waste": [], "rs_waste": [],
        "sq_fixed_cost": [], "rs_fixed_cost": [], "sq_variable_cost": [], "rs_variable_cost": [],
        "sq_holding_cost": [], "rs_holding_cost": [], "sq_wastage_cost": [], "rs_wastage_cost": [],
        "sq_shortage_cost": [], "rs_shortage_cost": []
    }

    sq_policy = sQInventoryPolicy(best_s, best_Q)
    rs_policy = RSInventoryPolicy(best_R, best_S)

    for timestep, row in data.iterrows():
        demand = row['Demand']
        results["demand"].append(demand)

        # SQ Policy
        sq_order = sq_policy.order(env.inventory)
        sq_inventory, sq_reward, sq_fixed, sq_variable, sq_holding, sq_shortage, sq_wastage = env.step(sq_order)
        sq_filled = min(demand, sq_inventory)
        results["sq_filled"].append(sq_filled)
        results["sq_orders"].append(sq_order)
        results["sq_inventory"].append(sq_inventory)
        results["sq_fixed_cost"].append(sq_fixed)
        results["sq_variable_cost"].append(sq_variable)
        results["sq_holding_cost"].append(sq_holding)
        results["sq_shortage_cost"].append(sq_shortage)
        results["sq_wastage_cost"].append(sq_wastage)
        results["sq_costs"].append(sq_fixed + sq_variable + sq_holding + sq_shortage + sq_wastage)

        # RS Policy
        rs_order = rs_policy.order(env.inventory, timestep)
        rs_inventory, rs_reward, rs_fixed, rs_variable, rs_holding, rs_shortage, rs_wastage = env.step(rs_order)
        rs_filled = min(demand, rs_inventory)
        results["rs_filled"].append(rs_filled)
        results["rs_orders"].append(rs_order)
        results["rs_inventory"].append(rs_inventory)
        results["rs_fixed_cost"].append(rs_fixed)
        results["rs_variable_cost"].append(rs_variable)
        results["rs_holding_cost"].append(rs_holding)
        results["rs_shortage_cost"].append(rs_shortage)
        results["rs_wastage_cost"].append(rs_wastage)
        results["rs_costs"].append(rs_fixed + rs_variable + rs_holding + rs_shortage + rs_wastage)

    # Ensure all arrays in the results dictionary have the same length
    max_length = max(len(values) for values in results.values())
    for key, values in results.items():
        if len(values) < max_length:
            results[key].extend([0] * (max_length - len(values)))

    # Fill rate and service cycle level
    results["sq_fill_rate"] = sum(results["sq_filled"]) / sum(results["demand"]) if sum(results["demand"]) > 0 else 0
    results["sq_service_cycle"] = len([f for f in results["sq_filled"] if f > 0]) / len(results["sq_filled"]) if len(results["sq_filled"]) > 0 else 0
    results["sq_waste"] = sum(results["sq_wastage_cost"]) / config["wastage_cost"]

    results["rs_fill_rate"] = sum(results["rs_filled"]) / sum(results["demand"]) if sum(results["demand"]) > 0 else 0
    results["rs_service_cycle"] = len([f for f in results["rs_filled"] if f > 0]) / len(results["rs_filled"]) if len(results["rs_filled"]) > 0 else 0
    results["rs_waste"] = sum(results["rs_wastage_cost"]) / config["wastage_cost"]

    return pd.DataFrame(results), results



In [5]:


# Generate Training and Testing Data
training_data = generate_data(36, 30, CONFIG)
testing_data = generate_data(6, 30, CONFIG)

# Train SQ and RS Policies
best_s, best_Q, best_cost_sq = train_evaluate_sq_policy_optimized(training_data, CONFIG)
best_R, best_S, best_cost_rs = train_evaluate_rs_policy_optimized(training_data, CONFIG)

# Evaluate Policies on Training Data
training_results_df, training_summary = evaluate_policies_sq_and_rs(training_data, best_s, best_Q, best_R, best_S, CONFIG)
training_results_df.to_excel("training_results.xlsx", index=False)

# Evaluate Policies on Testing Data
testing_results_df, testing_summary = evaluate_policies_sq_and_rs(testing_data, best_s, best_Q, best_R, best_S, CONFIG)
testing_results_df.to_excel("test_results.xlsx", index=False)

# Save Data
training_data.to_excel("training_data.xlsx", index=False)
testing_data.to_excel("testing_data.xlsx", index=False)

# Print Summary of Results
print("\n--- Training Data Results ---")
print(f"Optimal s: {best_s}, Optimal Q: {best_Q}")
print(f"Total SQ Policy Cost: {sum(training_results_df['sq_costs'])}")
print(f"SQ Fill Rate: {training_summary['sq_fill_rate']:.2f}")
print(f"SQ Service Cycle Level: {training_summary['sq_service_cycle']:.2f}")
print(f"SQ Wastage: {training_summary['sq_waste']} units")
print(f"Optimal R: {best_R}, Optimal S: {best_S}")
print(f"Total RS Policy Cost: {sum(training_results_df['rs_costs'])}")
print(f"RS Fill Rate: {training_summary['rs_fill_rate']:.2f}")
print(f"RS Service Cycle Level: {training_summary['rs_service_cycle']:.2f}")
print(f"RS Wastage: {training_summary['rs_waste']} units")

print("\n--- Testing Data Results ---")
print(f"Optimal s: {best_s}, Optimal Q: {best_Q}")
print(f"Total SQ Policy Cost: {sum(testing_results_df['sq_costs'])}")
print(f"SQ Fill Rate: {testing_summary['sq_fill_rate']:.2f}")
print(f"SQ Service Cycle Level: {testing_summary['sq_service_cycle']:.2f}")
print(f"SQ Wastage: {testing_summary['sq_waste']} units")
print(f"Optimal R: {best_R}, Optimal S: {best_S}")
print(f"Total RS Policy Cost: {sum(testing_results_df['rs_costs'])}")
print(f"RS Fill Rate: {testing_summary['rs_fill_rate']:.2f}")
print(f"RS Service Cycle Level: {testing_summary['rs_service_cycle']:.2f}")
print(f"RS Wastage: {testing_summary['rs_waste']} units")



--- Training Data Results ---
Optimal s: 98, Optimal Q: 27
Total SQ Policy Cost: 2262385
SQ Fill Rate: 0.91
SQ Service Cycle Level: 0.97
SQ Wastage: 631.0 units
Optimal R: 7, Optimal S: 265
Total RS Policy Cost: 1196925
RS Fill Rate: 0.77
RS Service Cycle Level: 0.85
RS Wastage: 3951.0 units

--- Testing Data Results ---
Optimal s: 98, Optimal Q: 27
Total SQ Policy Cost: 363755
SQ Fill Rate: 0.94
SQ Service Cycle Level: 0.98
SQ Wastage: 87.0 units
Optimal R: 7, Optimal S: 265
Total RS Policy Cost: 194005
RS Fill Rate: 0.81
RS Service Cycle Level: 0.88
RS Wastage: 631.0 units
