In [None]:
import os, sys, datetime, time, json, numpy as np, matplotlib.pyplot as plt
from tqdm.auto import tqdm

if os.path.basename(os.getcwd()) == "notebooks":
    os.chdir("..")
print(f"Current path: {os.getcwd()}")
sys.path.append('./')

from environments.complementary import ComplementaryPricingEnvironment
from agents.pricing.catalog import CatalogPricingAgent
from utils.plot import make_plot

In [None]:
INDEP_ENV = "IndependentEnv"
COMPL_ENV = "ComplementaryEnv"

INDEP_ALG = "\\indepalg"
COMPL_KNOWNGRAPH_ALG = "\\algknowngraph"
COMPL_UNKNOWNGRAPH_ALG = "\\algunknowngraph"

REGRET = "Regret"
REWARD = "Reward"
ACTIONS = "Actions"
OPTIMUM = "Optimum"

In [None]:
n_actions = 5
alpha = 1.0
mc_ep = 1000
kernel_L = 1000
num_trials = 10
dateformat = "(%Y-%b-%d %Ih%Mm%p)"

n_products_lst = [5, 10, 20]
n_baskets_lst = [100]
horizon_lst = [20, 50, 100, 200, 300]
env_to_test = [INDEP_ENV, COMPL_ENV]
algs_to_test = [INDEP_ALG, COMPL_KNOWNGRAPH_ALG, COMPL_UNKNOWNGRAPH_ALG]

max_prods = max(n_products_lst)

margins = np.linspace(0.1, 0.9, n_actions)
margins_to_idx = {margins[i]: i for i in range(n_actions)}
margins = margins.reshape(1, n_actions)
margins_env = np.repeat(margins, max_prods, axis=0)

data_folder = "input_data/"
demands_compl = np.load(f"{data_folder}demands.npy")

with open(f"{data_folder}dict_compl.json", "r", encoding="utf-8") as f:
    graph_dict_compl_dict = json.load(f)

graph_dict_indep_dict = {}
for i in n_products_lst:
    graph_dict_indep_dict[i] = {j : [] for j in range(i)}
        

In [None]:
elapsed_time_dict = {}

for horizon in horizon_lst:

    elapsed_time_dict[horizon] = {}

    print(f"Horizon {horizon}")

    for n_products in n_products_lst:

        print(f"\t{n_products} Products")

        elapsed_time_dict[horizon][n_products] = {}

        for n_baskets in n_baskets_lst:

            print(f"\t\t{n_baskets} Baskets")

            elapsed_time_dict[horizon][n_products][n_baskets] = {}

            for env_type in env_to_test:

                print(f"\t\t\t{env_type}")

                elapsed_time_dict[horizon][n_products][n_baskets][env_type] = {}

                plt.figure()

                results_dict_lst = {alg : [] for alg in algs_to_test}

                for alg_i, alg in enumerate(algs_to_test):

                    print(f"\t\t\t\t{alg}")

                    elapsed_time_dict[horizon][n_products][n_baskets][env_type][alg] = list(np.zeros((num_trials)))

                    for trial_i in tqdm(range(num_trials)):
                        
                        if env_type == INDEP_ENV:
                            graph_env = graph_dict_indep_dict[n_products]
                        elif env_type == COMPL_ENV:
                            graph_env = graph_dict_compl_dict[trial_i][n_products]
                        else:
                            raise ValueError("Not recognized environment.")
                        
                        if alg == INDEP_ALG:
                            graph_alg = graph_dict_indep_dict[n_products]
                        elif alg == COMPL_KNOWNGRAPH_ALG:
                            graph_alg = graph_dict_compl_dict[trial_i][n_products]
                        elif alg == COMPL_UNKNOWNGRAPH_ALG:
                            graph_alg = None
                        else:
                            raise NotImplementedError("Not recognized algorithm.")
                        
                        env = ComplementaryPricingEnvironment(n_products, n_actions, margins_env[:n_products, :], 
                                                                demands_compl[trial_i, :n_products, :, :], n_baskets, alpha, 
                                                                graph_env, mc_ep=mc_ep, seed=trial_i)

                        opt = env.compute_best_action_value()

                        results_dict_lst[alg].append({
                            REGRET : np.zeros((horizon)), 
                            REWARD : np.zeros((horizon)), 
                            ACTIONS : np.zeros((horizon, n_products)),
                            OPTIMUM : opt
                        })

                        agent = CatalogPricingAgent(n_products, n_actions, margins_env[:n_products, :], alpha, 
                                                    kernel_L, horizon, graph_dict=graph_alg)
                        
                        start_time = time.time()
                        
                        for t in range(horizon):

                            results_dict_lst[alg][-1][ACTIONS][t, :] = agent.pull()

                            res_mx = env.step(results_dict_lst[alg][-1][ACTIONS][t, :].ravel())

                            agent.update(res_mx)

                            results_dict_lst[alg][-1][REWARD][t] = env.compute_givenaction_value(
                                results_dict_lst[alg][-1][ACTIONS][t, :].ravel())

                            results_dict_lst[alg][-1][REGRET][t] = opt - results_dict_lst[alg][-1][REWARD][t]
                    
                        elapsed_time_dict[horizon][n_products][n_baskets][env_type][alg][trial_i] = time.time() - start_time

                    make_plot(results_dict_lst[alg], num_trials, horizon, alg, 
                            REWARD, alg_i, cumulative=False, reference_name=OPTIMUM, plot_all_lines=False)

                result_folder = "results"
                if not os.path.exists(result_folder):
                    os.makedirs(result_folder)

                save_path = f"{result_folder}/{env_type}_T{horizon}_prods{n_products}_bsk{n_baskets}_trials{
                    num_trials}_algs{algs_to_test}_{datetime.datetime.now().strftime(dateformat)}"

                plt.savefig(f"{save_path}.jpg")

                try:
                    import tikzplotlib as tkz
                    tkz.save(f"{save_path}.tex")
                except Exception as e:
                    print(f"Error: {e}")
                    print("failed to save in tikz")

In [None]:
color_lst = ["orange", "blue", "green", "magenta", "olive", "grey"]

for n_baskets in n_baskets_lst:

    for env_type in env_to_test:

        for n_products in n_products_lst:

            plt.figure()

            for alg_i, alg in enumerate(algs_to_test):

                vect = np.zeros(len(horizon_lst))
                vect_mean = np.zeros(len(horizon_lst)) 
                vect_std = np.zeros(len(horizon_lst))
                
                for horizon_i, horizon in enumerate(horizon_lst):
                    
                    vect = np.array(elapsed_time_dict[horizon][n_products][n_baskets][env_type][alg]) / horizon
                    vect_mean[horizon_i] = np.mean(vect)
                    vect_std[horizon_i] = np.std(vect)

                plt.plot(horizon_lst, vect_mean, "-o", color=color_lst[alg_i], label=alg)
                plt.fill_between(horizon_lst, vect_mean - 1.96 * vect_std / np.sqrt(num_trials), 
                                 vect_mean + 1.96 * vect_std / np.sqrt(num_trials), 
                                 color=color_lst[alg_i], alpha=0.3)
            
            plt.legend()
            plt.yscale("log")
            
            save_path = f"{result_folder}/{env_type}_prods{n_products}_bsk{n_baskets}_trials{
                num_trials}_algs{algs_to_test}_{datetime.datetime.now().strftime(dateformat)}"

            plt.savefig(f"{save_path}.jpg")

            try:
                import tikzplotlib as tkz
                tkz.save(f"{save_path}.tex")
            except Exception as e:
                print(f"Error: {e}")
                print("failed to save in tikz")

with open(f"{result_folder}/{env_to_test}_T{horizon_lst}_prods{n_products_lst}_bsk{n_baskets_lst}"
          f"_trials{num_trials}_algs{algs_to_test}_{datetime.datetime.now().strftime(dateformat)}.json", 
          "w", encoding="utf-8") as time_json:
    json.dump(elapsed_time_dict, time_json, ensure_ascii=False, indent=4)