In [1]:
import os, sys, datetime, numpy as np, matplotlib.pyplot as plt
from tqdm.auto import tqdm

if os.path.basename(os.getcwd()) == "notebooks":
    os.chdir("..")
print(f"Current path: {os.getcwd()}")
sys.path.append('./')

from environments.complementary import ComplementaryPricingEnvironment
from agents.pricing.catalog import CatalogPricingAgent

Current path: /Users/marcomussi/git/PricingComplementaryProducts


In [2]:
horizon = 10000
n_products = 5
n_actions = 5
n_baskets = 100000
products_probs = np.array([0.5, 0.7, 0.7, 0.6, 0.8])
alpha = 1.0 # 1 revenue, 0 profit
mc_ep = 10
num_trials = 5
kernel_L = 1000
dateformat = "(%Y-%b-%d %Ih%Mm%p)"

margins = np.linspace(0.1, 0.9, n_actions)
margins_to_idx = {margins[i]: i for i in range(n_actions)}
margins = margins.reshape(1, n_actions)
margins_env = np.repeat(margins, n_products, axis=0)

demands = np.array([[0.7, 0.7, 0.3, 0.2, 0.1], 
                    [0.7, 0.6, 0.6, 0.4, 0.1], 
                    [0.7, 0.6, 0.6, 0.5, 0.3],
                    [0.7, 0.7, 0.5, 0.2, 0.1], 
                    [0.7, 0.6, 0.6, 0.4, 0.1]])
demands_compl = np.repeat(demands, 2, axis=1).reshape(n_products, n_actions, 2)
demands_compl[1, :, 1] = np.array([0.95, 0.9, 0.7, 0.6, 0.4]) # enhanced demand for item 1
demands_compl[2, :, 1] = np.array([0.95, 0.9, 0.7, 0.6, 0.4]) # enhanced demand for item 2
demands_compl[4, :, 1] = np.array([0.99, 0.99, 0.7, 0.6, 0.4]) # enhanced demand for item 4

In [3]:
graph_dict_indep = {0: [], 1: [], 2: [], 3: [], 4: []} # product 0, 1, 2, 3, 4 are all leaders/independent
graph_dict_compl = {0: [1, 2], 3: [4]} # product 0 is leader for products 1 and 2, product 3 is leader for product 4

graph_dict_lst = [graph_dict_indep, graph_dict_compl]

for graph in graph_dict_lst:

    print(graph)
    env = ComplementaryPricingEnvironment(n_products, n_actions, margins_env, demands_compl, 
                                          n_baskets, products_probs, alpha, graph, 
                                          mc_ep=mc_ep, seed=0)
    for i in env.leaders_lst:
        print(f"Optimal values for leader {i}:")
        print(np.round(env.action_values[i], 3))

    print("\n")

{0: [], 1: [], 2: [], 3: [], 4: []}
Optimal values for leader 0:
[0.384 0.455 0.224 0.17  0.095]
Optimal values for leader 1:
[0.54  0.545 0.63  0.478 0.133]
Optimal values for leader 2:
[0.539 0.546 0.63  0.596 0.399]
Optimal values for leader 3:
[0.462 0.546 0.45  0.205 0.114]
Optimal values for leader 4:
[0.616 0.624 0.72  0.542 0.151]


{0: [1, 2], 3: [4]}
Optimal values for leader 0:
[[1.732 1.86  1.793 1.705 1.289]
 [1.803 1.932 1.861 1.776 1.361]
 [1.418 1.479 1.548 1.403 0.917]
 [1.325 1.372 1.472 1.312 0.809]
 [1.211 1.241 1.376 1.203 0.682]]
Optimal values for leader 3:
[[1.256 1.369 1.265 1.196 0.934]
 [1.342 1.454 1.349 1.282 1.017]
 [1.194 1.276 1.23  1.129 0.828]
 [0.871 0.91  0.948 0.802 0.447]
 [0.756 0.778 0.847 0.684 0.312]]




In [4]:
act = margins_env[:, 0]

print(act)

sales_impr_mx = env.step(act)

for prod_i in range(n_products):
    assert np.sum(sales_impr_mx[prod_i, sales_impr_mx[prod_i, :, 1] == 0, 0]) == 0, "sales presents error"
    if prod_i in env.leaders_lst:

        print(f"\nProd {prod_i} is leader")

        print(f"Impression probability {np.sum(sales_impr_mx[prod_i, :, 1]) / env.n_baskets} (real: {env.products_probs[prod_i]})")
        dmn = np.sum(sales_impr_mx[prod_i, :, 0]) / np.sum(sales_impr_mx[prod_i, :, 1])

        print(f"Demand {np.round(dmn, 3)} (real: {env.demands[prod_i, env.margins_to_idx_lst[prod_i][act[prod_i]], 0]})")

    else:

        print(f"\nProd {prod_i} is follower")
        print(f"Impression probability {np.sum(sales_impr_mx[prod_i, :, 1]) / env.n_baskets} (real: {env.products_probs[prod_i]})")
        
        demand_overall = np.sum(sales_impr_mx[prod_i, :, 0]) / np.sum(sales_impr_mx[prod_i, :, 1])
        
        mask_lead = sales_impr_mx[env.follower_to_leader_dict[prod_i], :, 0] == 1
        
        demand_lead = np.sum(sales_impr_mx[prod_i, mask_lead, 0]) / np.sum(sales_impr_mx[prod_i, mask_lead, 1])
        demand_no_lead = np.sum(sales_impr_mx[prod_i, ~mask_lead, 0]) / np.sum(sales_impr_mx[prod_i, ~mask_lead, 1])
        
        print(f"Demand: overall {np.round(demand_overall, 3)} - with leader {np.round(demand_lead, 3)} (computed with {np.sum(mask_lead)} samples - real: {env.demands[prod_i, env.margins_to_idx_lst[prod_i][act[prod_i]], 1]}) - without leader {np.round(demand_no_lead, 3)} (computed with {np.sum(np.logical_not(mask_lead))} samples - real: {env.demands[prod_i, env.margins_to_idx_lst[prod_i][act[prod_i]], 0]})")

[0.1 0.1 0.1 0.1 0.1]

Prod 0 is leader
Impression probability 0.50055 (real: 0.5)
Demand 0.698 (real: 0.7)

Prod 1 is follower
Impression probability 0.69739 (real: 0.7)
Demand: overall 0.874 - with leader 0.949 (computed with 34916 samples - real: 0.95) - without leader 0.833 (computed with 65084 samples - real: 0.7)

Prod 2 is follower
Impression probability 0.70344 (real: 0.7)
Demand: overall 0.874 - with leader 0.949 (computed with 34916 samples - real: 0.95) - without leader 0.835 (computed with 65084 samples - real: 0.7)

Prod 3 is leader
Impression probability 0.60246 (real: 0.6)
Demand 0.698 (real: 0.7)

Prod 4 is follower
Impression probability 0.79955 (real: 0.8)
Demand: overall 0.903 - with leader 0.991 (computed with 42046 samples - real: 0.99) - without leader 0.839 (computed with 57954 samples - real: 0.7)


In [None]:
print(env.demands[:, :, 0])
print(env.demands[:, :, 1])

demands = np.array([[0.7, 0.7, 0.3, 0.2, 0.1], 
                    [0.7, 0.6, 0.6, 0.4, 0.1], 
                    [0.7, 0.6, 0.6, 0.5, 0.3],
                    [0.7, 0.7, 0.5, 0.2, 0.1], 
                    [0.7, 0.6, 0.6, 0.4, 0.1]])
demands_compl = np.repeat(demands, 2, axis=1).reshape(n_products, n_actions, 2)
demands_compl[1, :, 1] = np.array([0.95, 0.9, 0.7, 0.6, 0.4]) # enhanced demand for item 1
demands_compl[2, :, 1] = np.array([0.95, 0.9, 0.7, 0.6, 0.4]) # enhanced demand for item 2
demands_compl[4, :, 1] = np.array([0.99, 0.99, 0.7, 0.6, 0.4]) # enhanced demand for item 4

In [None]:
graph_dict = {0: [1, 2]} # product 0 is leader of products 1 and 2

results_lst = []

for trial_i in range(num_trials):

    env = ComplementaryPricingEnvironment(n_products, n_actions, margins_env, 
                                          demands_compl, n_baskets, products_probs, 
                                          alpha, graph_dict, mc_ep=mc_ep, seed=trial_i)

    action_vals = env.compute_values()

    results_lst.append({"pseudo_regret" : np.zeros((horizon)), 
                        "actions" : np.zeros((horizon, n_products))})
    
    agent = CatalogPricingAgent(n_products, n_actions, margins_env, alpha, 
                                kernel_L, horizon, graph_dict=graph_dict)

    opt = env.compute_best_action_value()

    for t in tqdm(range(horizon)):

        results_lst[-1]["actions"][t, :] = agent.pull()

        res_mx = env.step(results_lst[-1]["actions"][t, :].ravel())

        agent.update(res_mx[:, :, 0], res_mx[:, :, 1])

        results_lst[-1]["pseudo_regret"][t] = opt - env.compute_action_value(
            results_lst[-1]["actions"][t, :].ravel())
    


In [None]:
inst_regret_matrix = np.zeros((num_trials, horizon))

for i in range(num_trials):
    inst_regret_matrix[i, :] = np.array(results_lst[i]["pseudo_regret"])
    
cum_regret_matrix = np.cumsum(inst_regret_matrix, axis=1)
results_mean = np.mean(cum_regret_matrix, axis=0)
results_std = 1.96 * np.std(cum_regret_matrix, axis=0) / np.sqrt(num_trials)

x_plt = np.linspace(0, horizon-1, horizon, dtype=int)
plt.plot(x_plt, results_mean[x_plt], label="Regret Mean $\pm$ 95% C.I.")
plt.fill_between(x_plt, results_mean[x_plt] - results_std[x_plt], 
                 results_mean[x_plt] + results_std[x_plt], alpha=0.3)
plt.xlabel("Rounds")
plt.ylabel("Cumulative Regret")
plt.legend()

result_folder = "results"
if not os.path.exists(result_folder):
    os.makedirs(result_folder)
save_path = result_folder + "/compl_run_" + datetime.datetime.now().strftime(dateformat) + ".jpg"
plt.savefig(save_path)