In [1]:
import numpy.random as npr
import numpy as np
import random 
import torch
from torch.autograd import Variable
global_seed = 0
random.seed(global_seed)
npr.seed(global_seed)


"""
Please see KnapsackNetsModified.py is KnapsackNets.py with a few small changes to make it run w/o errors.
- Added unsqueezes to make dimensions fit
- commented out a few lines (normalize_jk) that were failing + output wasn't used for anything downstream

In initial runs -- using v37, not v46. Q around what the difference is and which one to use?
"""

from KnapsackNetsModified import SafetyNet
#from KnapsackNets import SafetyNet


  (fname, cnt))
  (fname, cnt))


# Generate Simulation Parameters

From JK thesis!
- https://www.cmu.edu/tepper/programs/phd/program/assets/dissertations/2017-joint-phd-program-algorithms-combinatorics-and-optimization-karp-dissertation.pdf

- Demand for each cluster is modeled as a Poisson process whose rate parameter is drawn from a Normal distribution
with mean 10 and standard deviation 5. 
- Each day of the simulation allows the Poisson processes generating demand to run for either 40 or 400 time units (depending on simulation condition). 
- Inventory distributions for each category are modeled as Poisson distributions with rate parameters drawn from a Uniform distribution from 1 to 20. 
- The price of items in each category is drawn from a Normal distribution with mean 800 and standard deviation 400. 
- Each order is assigned a cancel probability based on a logistic function of the form 1 − 1/1+ea+b·I , where I is the item’s stated inventory level and a and b are parameters of the item’s category. a and b are drawn for each category from Normal distribution with mean .5 and −.2 and standard deviations .1 and either .05 or .15 (depending on simulation condition), respectively. 
- The inventory distribution parameters, cancel parameters, demand rates, and prices are all truncated at 0 to prevent negative values.
- We vary data truncation by setting our benchmark Retail-1-threshold policies to single thresholds
of either 8 or 14, allowing approximately 1/3 and 2/3 of total demand to get truncated,
respectively. 
- Volume of demand is varied by allowing either 40 or 400 time units
for the demand generating Poisson process during each period of the simulation.
- We control cancel variability by running simulations where the standard deviation
of cancel parameter b is .05 as well as .15. 
- We use the corresponding Retail-1-threshold policy as the starting threshold for the Onera policies and then
let the Onera policies train on the data it collects and determine its own thresholds
for the remaining days. 


In [2]:
from SimulationParams import SafetyNetSimulation 

"""
Please see SimulationParams.py for this implementation...!
"""
safety_net_simulation = SafetyNetSimulation(
                 is_high_demand=False, 
                 is_high_truncation=False, 
                 is_high_variable_cancel_rate=False)
            
        

# Initialize SafetyNet

In [3]:
def init_safety_net(safety_net_simulation):
    starting_retail_policy_1_threshold = torch.zeros(safety_net_simulation.nCategories, 
                                       safety_net_simulation.nThresholds)
    starting_retail_policy_1_threshold[:,safety_net_simulation.retail_1_threshold_policy] = 1

    safety_net = SafetyNet(
        nKnapsackCategories=safety_net_simulation.nCategories, 
        nThresholds=safety_net_simulation.nThresholds, 
        starting_thresholds=starting_retail_policy_1_threshold, 
        # all possible thresholds? Or is this retail policy 1?
        parametric_knapsack=False # First Pass, turn off LP layer
        # Q: What should we be doing with the remaining variables here?
    )
    return safety_net

# Initialize Opt/Est learning rates

In [4]:
def init_opt_est_params(safety_net):
    # JK provided learning rates
    safety_net_optimizer_est = torch.optim.SGD([
    {'params':[safety_net.prices_est]}, #what will happen here since there is no learning rate? will it always stay the same?
    {'params':[safety_net.inventory_lam_est], 'lr': 1e-0},
    {'params':[safety_net.demand_distribution_est], 'lr': 1e-2},
    {'params':[safety_net.cancel_coef_est], 'lr': 5e-3},
    {'params':[safety_net.cancel_intercept_est], 'lr': 1e-1}
        ], lr=1e-7)

    safety_net_optimizer_opt = torch.optim.SGD([
    {'params':[safety_net.inventory_lam_opt], 'lr': 2e-5},
    {'params':[safety_net.demand_distribution_opt], 'lr': 1e-5},
    {'params':[safety_net.cancel_coef_opt], 'lr': 2e-7},
    {'params':[safety_net.cancel_intercept_opt], 'lr': 2e-6}
        ], lr=1e-8)

    safety_net_optimizer_RHS =torch.optim.SGD([
    {'params':[safety_net.cancel_rate_param, safety_net.accept_rate_param], 'lr': 1e-6}
        ], lr=1e-8)
    
    return safety_net_optimizer_est, safety_net_optimizer_opt, safety_net_optimizer_RHS

# Train

In [7]:
num_periods = 7 # or epochs
nBatches = 32

safety_net = init_safety_net(safety_net_simulation)
safety_net_optimizer_est, safety_net_optimizer_opt, safety_net_optimizer_RHS = init_opt_est_params(safety_net)

for i in range(num_periods):
    print("Order Period: " + str(i))
    # Generate orders for period
    orders, inv_count, price, cancel_probs, collection_thresholds = safety_net_simulation.generate_data()
    batch_size = int(orders.shape[0]/nBatches)
    
    if i == 0:
        safety_net.parametric_knapsack = False
    else:
        safety_net.parametric_knapsack = True
        
    head = 0
    tail = batch_size
    n_orders = orders.shape[0]
    batch_num = 0
    while tail < orders.shape[0]:
        
        ## Grab the next batch of order and corresponding inv, price, cancel, thresh info
        tail = np.min([n_orders-1, head+batch_size])
        if (n_orders - 1 - tail) < batch_size:
            tail = n_orders
        batch_orders = orders[head:tail,:]
        batch_inv_count = inv_count[head:tail,:]
        batch_price = price[head:tail]
        batch_cancel_probs = cancel_probs[head:tail]
        batch_collection_thresholds = collection_thresholds[head:tail,:]
        
        head = tail

        # Forward Pass
        (new_revenue_loss, #renamed from new_objective_loss
         new_cancel_constraint_loss, 
         new_accept_constraint_loss, 
         arrival_probability_batch_by_threshold, 
         log_arrival_prob, 
         log_cancel_prob, 
         log_category_prob, 
         estimated_batch_total_demand, 
         observed_cancel_constraint_loss, 
         observed_accept_constraint_loss, 
         lp_infeasible) = safety_net.forward(
            category=Variable(torch.from_numpy(batch_orders.astype(np.float32))),
            inv_count=torch.from_numpy(batch_inv_count.astype(np.float32)),
            price=torch.from_numpy(batch_price.astype(np.float32)),
            cancel=torch.from_numpy(batch_cancel_probs.astype(np.float32)),
            collection_thresholds=torch.from_numpy(batch_collection_thresholds.astype(np.float32))
        )
        
        # "logging"
        if batch_num % 5 == 0:
            print("Batch: ", batch_num)
            print(
             "New Revenue Loss", new_revenue_loss, #renamed from new_objective_loss
             "New Cancel Constraint Loss", new_cancel_constraint_loss, 
             "New Accept Constraint Loss", new_accept_constraint_loss)
            print( 
             "Obs Cancel Constraint Loss", observed_cancel_constraint_loss, 
             "Obs Accept Constraint Loss", observed_accept_constraint_loss, 
             "LP Infeasible", lp_infeasible)

        # Backward Pass
        
        # zero grad
        safety_net_optimizer_est.zero_grad()
        safety_net_optimizer_opt.zero_grad()
        safety_net_optimizer_RHS.zero_grad()

        is_cancel_constraint_violated = new_cancel_constraint_loss < 0 # TODO: How to calculate is_cancel_constraint_violated?
        is_other_constraints_violated = new_accept_constraint_loss > 0 # TODO: How to calculate is_other_constraints_violated?

        # update parameters based on violation logic
        # TODO: If both constraints are violated, do we run backward for both cancel and accept?
        if is_cancel_constraint_violated: 
            new_cancel_constraint_loss.backward(retain_graph=True)
            safety_net_optimizer_RHS.step()
        if is_other_constraints_violated: 
            new_accept_constraint_loss.backward()
            safety_net_optimizer_RHS.step()
        if not is_cancel_constraint_violated and not is_other_constraints_violated:
            #print(safety_net.inventory_lam_est)
            #print(safety_net.demand_distribution_est)
            #print(safety_net.cancel_coef_est)
            #print(safety_net.cancel_intercept_est)

            new_revenue_loss.backward()
            if i == 0:
                #IF first pass, then we only step for estimation parameters AND
                #manually copy values of temp_optimizer_est into temp_optimizer_opt params
                safety_net_optimizer_est.step()
                # Manually copy values of est params into opt params
                safety_net.inventory_lam_opt = safety_net.inventory_lam_est
                safety_net.demand_distribution_opt = safety_net.demand_distribution_est
                safety_net.cancel_coef_opt = safety_net.cancel_coef_est
                safety_net.cancel_intercept_opt = safety_net.cancel_intercept_est
            else:    
                safety_net_optimizer_est.step()
                safety_net_optimizer_opt.step()
        
        batch_num +=1
    

Order Period: 0
Batch:  0
New Revenue Loss tensor(-5.2655) New Cancel Constraint Loss tensor(24.1175) New Accept Constraint Loss tensor(-7.3546)
Obs Cancel Constraint Loss tensor(24.1175) Obs Accept Constraint Loss tensor(-7.3546) LP Infeasible 0
Batch:  5
New Revenue Loss tensor(-4.9721) New Cancel Constraint Loss tensor(27.0034) New Accept Constraint Loss tensor(-6.4566)
Obs Cancel Constraint Loss tensor(27.0034) Obs Accept Constraint Loss tensor(-6.4566) LP Infeasible 0
Batch:  10
New Revenue Loss tensor(-5.0297) New Cancel Constraint Loss tensor(23.3906) New Accept Constraint Loss tensor(-6.8585)
Obs Cancel Constraint Loss tensor(23.3906) Obs Accept Constraint Loss tensor(-6.8585) LP Infeasible 0
Batch:  15
New Revenue Loss tensor(-5.0831) New Cancel Constraint Loss tensor(22.9478) New Accept Constraint Loss tensor(-7.2646)
Obs Cancel Constraint Loss tensor(22.9478) Obs Accept Constraint Loss tensor(-7.2646) LP Infeasible 0
Batch:  20
New Revenue Loss tensor(-5.9467) New Cancel Con

RuntimeError: value cannot be converted to type float without overflow: 10000000000000000159028911097599180468360808563945281389781327557747838772170381060813469985856815104.000000