## Import

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import ParameterGrid
sns.set_style("darkgrid")
sns.set_palette("rainbow")
sns.set_context("talk")

In [2]:
import sys  
sys.path.insert(0, r'C:\Users\kishore.kukreja\Desktop\Inventory Policy Two Echelon\SCM-RL\SCM\Baseline')

In [3]:
from SQPolicy import SQPolicy, simulate, simulate_episode
from Environment import SupplyChainEnvironment
from utils import visualize_transitions

In [4]:
from ax import optimize

## Data Read

In [None]:
df = pd.read_csv(r"C:\Users\kishore.kukreja\Desktop\Inventory Policy Two Echelon\SCM-RL\data\HistoricalProductDemand.csv")

In [None]:
df["Date"] = pd.to_datetime(df["Date"]) # Convert Dates
df["Order_Demand"] = df["Order_Demand"].fillna(0).apply(lambda x: pd.to_numeric(x, errors='coerce')) # Convert to Float
df = df.dropna() # Remove rows with NaN

In [None]:
is_NaN = df.isnull()
row_has_NaN = is_NaN.any(axis=1)
rows_with_NaN = df[row_has_NaN]
rows_with_NaN

In [None]:
df = df.sort_values(by=["Product_Code"])
product_codes = df["Product_Code"].drop_duplicates()

In [None]:
counts = df.groupby("Product_Code").size()
counts = counts.to_dict()

In [None]:
counts = dict(sorted(counts.items(), key=lambda item: item[1], reverse=True))

In [None]:
for idx, (k, v) in enumerate(counts.items()):
    print(f"{k}: {v}")
    
    if idx == 2:
        break

In [None]:
demand_1359 = df[df["Product_Code"] == 'Product_1359'].drop(columns=["Product_Code", "Warehouse", "Product_Category"])
demand_1295 = df[df["Product_Code"] == 'Product_1295'].drop(columns=["Product_Code", "Warehouse", "Product_Category"])

In [None]:
demand_1359.sort_values(by="Date", inplace=True)
demand_1295.sort_values(by="Date", inplace=True)

In [None]:
demand_1359["Total_Demand"] = demand_1359.groupby("Date")["Order_Demand"].transform('sum')
demand_1295["Total_Demand"] = demand_1295.groupby("Date")["Order_Demand"].transform('sum')

In [None]:
demand_1359.drop_duplicates(subset=["Date"], inplace=True)
demand_1359.drop(columns="Order_Demand", inplace=True)
demand_1359.set_index("Date", inplace=True)

demand_1295.drop_duplicates(subset=["Date"], inplace=True)
demand_1295.drop(columns="Order_Demand", inplace=True)
demand_1295.set_index("Date", inplace=True)

In [None]:
demand_1359

In [None]:
final = pd.merge(demand_1359, demand_1295, on="Date")
final.rename({
    'Total_Demand_x': 'demand_1359',
    'Total_Demand_y': 'demand_1295',
}, axis='columns', inplace=True)

In [None]:
plt.figure(figsize=(9, 5))

f, ax = plt.subplots(1, 1, figsize=(16, 7))

ax.plot(final["demand_1359"], label="Product_1359")
ax.plot(final["demand_1295"], label="Product_1295")

ax.legend()

In [None]:
demand_1359.to_csv(r"C:\Users\kishore.kukreja\Desktop\Inventory Policy Two Echelon\SCM-RL\data\demand_1359.csv")
demand_1295.to_csv(r"C:\Users\kishore.kukreja\Desktop\Inventory Policy Two Echelon\SCM-RL\data\demand_1295.csv")
final.to_csv(r"C:\Users\kishore.kukreja\Desktop\Inventory Policy Two Echelon\SCM-RL\data\demand.csv")

## Create Environment

In [5]:
def eval_func(p: dict, log=False):
    policy = SQPolicy(
        factory_safety_stock=p['factory_s'],
        factory_reorder_amount=p['factory_Q'],
        safety_stock=(p['w1_s'], p['w2_s']),
        reorder_amount=(p['w1_Q'], p['w2_Q']))

    return np.mean(simulate(policy, num_episodes=50))

In [6]:
parameters = (
    {
        "name": "factory_s",
        "type": "range",
        "bounds": [0.0, 10.0],
        "value_type": "float"
    },
    {
        "name": "factory_Q",
        "type": "range",
        "bounds": [5.0, 10.0],
        "value_type": "float"
    },
    {
        "name": "w1_s",
        "type": "range",
        "bounds": [0.0, 10.0],
        "value_type": "float"
    },
    {
        "name": "w1_Q",
        "type": "range",
        "bounds": [5.0, 10.0],
        "value_type": "float"
    },
    {
        "name": "w2_s",
        "type": "range",
        "bounds": [0.0, 10.0],
        "value_type": "float"
    },
    {
        "name": "w2_Q",
        "type": "range",
        "bounds": [5.0, 10.0],
        "value_type": "float"
    },
)

In [7]:
# Reward: mean 6660.313520818671, standard deviation 489.4557712110093
def bayesian_optimization(total_trials, parameters):
    best_parameters, best_values, experiment, model = optimize(
        parameters=parameters,
        evaluation_function=eval_func,
        minimize=False,
        total_trials=total_trials
    )

    return best_parameters, best_values

In [8]:
# Reward: mean 5347.2, standard deviation 613.7202620086778
def grid_search(p_grid1=[0, 5, 10],
                p_grid2=[0, 5, 10]):

    param_grid = {
        'factory_s': p_grid1,
        'factory_Q': p_grid1,
        'w1_s': p_grid2,
        'w2_s': p_grid2,
        'w1_Q': p_grid2,
        'w2_Q': p_grid2,
    }

    grid = ParameterGrid(param_grid)

    i = 0
    best_return = float('-inf')
    for p in grid:
        m_return = eval_func(p)
        if m_return > best_return:
            best_return = m_return
            best_params = p

        if i % 100 == 0:
            print(f"Configuration #{i} -- {best_return}")
        i += 1

    return best_params, best_return

In [9]:
def main(total_trials=1, num_episodes=1, optimization_strategy="BayesianOptimization"):
    if optimization_strategy == "BayesianOptimization":
        best_params, best_values = bayesian_optimization(
            total_trials, parameters)
    else:
        best_params, best_values = grid_search()

    print(f"Best Parameters: \n{best_params}")
    print(f"Best Values: \n{best_values}")

    factory_safety_stock = best_params["factory_s"]
    factory_reorder_amount = best_params["factory_Q"]
    safety_stock = (best_params["w1_s"], best_params["w2_s"])
    reorder_amount = (best_params["w1_Q"], best_params["w2_Q"])

    sq_policy = SQPolicy(
        factory_safety_stock,
        factory_reorder_amount,
        safety_stock,
        reorder_amount
    )

    return_trace = simulate(sq_policy, num_episodes=num_episodes, log=True)

    plt.figure(figsize=(16, 4))
    plt.plot(range(len(return_trace)), return_trace)
    print(
        f"Reward: mean {np.mean(return_trace)}, standard deviation {np.std(return_trace)}")

    plt.show()

    # transitions_sQ = simulate_episode(sq_policy, log=True)
    # visualize_transitions(np.array(transitions_sQ), T=50)

In [10]:
total_trials=5
num_episodes=5
optimization_strategy="GridSearch" ## "GridSearch","BayesianOptimization"

In [None]:
main(total_trials=total_trials, num_episodes=num_episodes, optimization_strategy="GridSearch")

Configuration #0 -- -205990.0
Configuration #100 -- -204312.0
Configuration #200 -- -202432.0
Configuration #300 -- -13341.34
