In [1]:
%reload_ext autoreload
%autoreload 2

In [2]:
import time
import torch
import pandas as pd
from sklearn.utils import check_random_state
from opl import OPL, evaluate_OPL_algorithm, OPL_OPFV_tune_phi
import warnings
warnings.filterwarnings('ignore')
import conf
from tqdm import tqdm
from preprocess import pre_process
from logging import getLogger
from pathlib import Path

In [3]:
time_whole_execution_start = time.time()

In [4]:
logger = getLogger(__name__)
logger.info(f"The current working directory is {Path().cwd()}")

# log path
log_path = Path("./real_data")
df_path = log_path / "df"
df_path.mkdir(exist_ok=True, parents=True)

INFO:__main__:The current working directory is /Users/s23599/document/research/Sony-Non-Stationary-OPE-OPL/icml2024-opfv/src-optimize-phi/real/F-OPL-newest


In [5]:
# If you are running locally, make sure you are in the directory of KuaiRec.
rootpath="../../../../KuaiRec/"

# Read the CSV files
print("Loading big matrix...")
big_matrix = pd.read_csv(rootpath + "data/big_matrix.csv")
print("Loading small matrix...")
small_matrix = pd.read_csv(rootpath + "data/small_matrix.csv")

print("Loading social network...")
social_network = pd.read_csv(rootpath + "data/social_network.csv")
social_network["friend_list"] = social_network["friend_list"].map(eval)

print("Loading item features...")
item_categories = pd.read_csv(rootpath + "data/item_categories.csv")
item_categories["feat"] = item_categories["feat"].map(eval)

print("Loading user features...")
user_features = pd.read_csv(rootpath + "data/user_features.csv")

print("Loading items' daily features...")
item_daily_features = pd.read_csv(rootpath + "data/item_daily_features.csv")

print("All data loaded.")

Loading big matrix...
Loading small matrix...
Loading social network...
Loading item features...
Loading user features...
Loading items' daily features...
All data loaded.


In [6]:
# Set seed
torch.manual_seed(conf.random_state)
random_ = check_random_state(conf.random_state)

test_policy_value_list_DM_all_results = []
test_policy_value_list_IPS_all_results = []
test_policy_value_list_SNIPS_all_results = []
test_policy_value_list_SNDR_all_results = []
pi_learned_list_all_results = []

for _ in tqdm(range(conf.n_seeds)):
    print(f"\n############################################### START of ROUND {_ + 1}/{conf.n_seeds} ###############################################")

    ### Preprocess ###
    time_pre_process_start = time.time()
    dataset, dataset_train, dataset_test = pre_process(small_matrix, big_matrix, item_categories, item_daily_features, user_features, social_network, 
                                                       random_state = conf.random_state + _, n_actions=conf.n_actions, dim_context=conf.dim_context, dim_action_context=conf.dim_action_context)
    time_pre_process_end = time.time()

    elapsed_time = time_pre_process_end - time_pre_process_start
    print(f'Execution time for preprocessing = {elapsed_time / 60:.3f} mins') 

    ### OPL ###
    start_time = time.time()

    # pi_reg, pi_ips, pi_dr, pi_prognosticator_DM, pi_prognosticator_IPS, pi_prognosticator_SNIPS, pi_prognosticator_SNDR, pi_opfv, pi_opfv_tuned = OPL(
    pi_reg, pi_ips, pi_dr, pi_prognosticator_DM, pi_prognosticator_IPS, pi_prognosticator_SNIPS, pi_prognosticator_SNDR, pi_opfv = OPL(
        dataset = dataset, 
        dataset_test = dataset_test, 
        dataset_train = dataset_train, 
        time_test = dataset_test["time"], 
        round = conf.random_state + _, 

        flag_plot_loss = conf.flag_plot_loss, 
        flag_plot_value_test = conf.flag_plot_value_test, 
        flag_plot_value_train = conf.flag_plot_value_train, 

        num_time_structure_for_OPFV_reward = conf.num_time_structure_for_OPFV_reward, 
        phi_scalar_func_for_OPFV = conf.phi_scalar_func_for_OPFV, 
        
        n_actions = dataset["n_actions"], 
        dim_context = dataset["dim_context"], 

        max_iter = conf.max_iter, 
        batch_size = conf.batch_size, 
        num_time_learn = conf.num_time_learn, 
        pi_learned_list_all_results = pi_learned_list_all_results, 
    )

    # pi_opfv_tuned = OPL_OPFV_tune_phi(
    #     dataset = dataset, 
    #     dataset_test = dataset_test, 
    #     dataset_train = dataset_train, 
    #     time_test = dataset_test["time"], 
    #     round = conf.random_state + _, 

    #     num_time_structure_for_OPFV_reward = conf.num_time_structure_for_OPFV_reward, 
    #     phi_scalar_func_for_OPFV = conf.phi_scalar_func_for_OPFV, 
        
    #     n_actions = dataset["n_actions"], 
    #     dim_context = dataset["dim_context"], 

    #     max_iter = conf.max_iter, 
    #     batch_size = conf.batch_size, 
    #     num_time_learn = conf.num_time_learn, 
    # )


    end_time = time.time()
    elapsed_time = end_time - start_time

    print(f"Execution time for OPL: {elapsed_time / 60:.3f} mins")


    ### Evaluate the learned policy ###
    test_policy_value_list_DM, test_policy_value_list_IPS, test_policy_value_list_SNIPS, test_policy_value_list_SNDR = evaluate_OPL_algorithm(dataset_test=dataset_test, 
                                                                                pi_reg=pi_reg, 
                                                                                pi_ips=pi_ips, 
                                                                                pi_dr=pi_dr, 

                                                                                pi_prognosticator_DM=pi_prognosticator_DM, 
                                                                                pi_prognosticator_IPS=pi_prognosticator_IPS, 
                                                                                pi_prognosticator_SNIPS=pi_prognosticator_SNIPS, 
                                                                                pi_prognosticator_SNDR=pi_prognosticator_SNDR, 
                                                                                pi_opfv=pi_opfv, 
                                                                                # pi_opfv_tuned=pi_opfv_tuned, 

                                                                                test_policy_value_list_DM_all_results = test_policy_value_list_DM_all_results, 
                                                                                test_policy_value_list_IPS_all_results = test_policy_value_list_IPS_all_results, 
                                                                                test_policy_value_list_SNIPS_all_results = test_policy_value_list_SNIPS_all_results, 
                                                                                test_policy_value_list_SNDR_all_results = test_policy_value_list_SNDR_all_results, 
                                                                                round = _, 
                                                                                )
    
    ### Write the result tables to the CSV files
    result_pi_learned = pi_learned_list_all_results.copy()
    result_df_DM = test_policy_value_list_DM_all_results.copy()
    result_df_IPS = test_policy_value_list_IPS_all_results.copy()
    result_df_SNIPS = test_policy_value_list_SNIPS_all_results.copy()
    result_df_SNDR = test_policy_value_list_SNDR_all_results.copy()

    result_df_DM = pd.DataFrame(result_df_DM)
    result_df_IPS = pd.DataFrame(result_df_IPS)
    result_df_SNIPS = pd.DataFrame(result_df_SNIPS)
    result_df_SNDR = pd.DataFrame(result_df_SNDR)

    result_df_DM.to_csv(df_path / "result_df_DM.csv")
    result_df_IPS.to_csv(df_path / "result_df_IPS.csv")
    result_df_SNIPS.to_csv(df_path / "result_df_SNIPS.csv")
    result_df_SNDR.to_csv(df_path / "result_df_SNDR.csv")

    print(f"############################################### END of ROUND {_ + 1}/{conf.n_seeds} ###############################################\n\n\n")

time_whole_execution_end = time.time()

time_whole_execution = time_whole_execution_end - time_whole_execution_start

print(f'Execution time = {time_whole_execution / 60:.3f} mins')

  0%|          | 0/10 [00:00<?, ?it/s]


############################################### START of ROUND 1/10 ###############################################

#################### START of preprocessing ####################
Maximum number of unique actions that we can use for training and test data = 2547
Execution time of preprocess_item_categories = 0.007 mins
Executing preprocess_item_daily_features
Execution time for preprocess_item_daily_features = 1.987 mins
Executing preprocess_user_features
Execution time for preprocess_user_features = 0.002 mins
Executing reduce_the_dim_context
The dimension of the original context = 131
The dimension of the context after PCA = 60
The cumularive explained variance for the dimension of context is 0.736
Execution time for educe_the_dim_context = 1.622 mins
Executing reduce_the_dim_action_context
The dimension of the original action context = 619
The dimension of the action context after PCA = 40
The cumularive explained variance for the dimension of action context is 0.780
Execution ti

num_features_for_Prognosticator = 3: 100%|██████████| 1/1 [01:18<00:00, 78.57s/it]
num_features_for_Prognosticator = 5: 100%|██████████| 1/1 [01:18<00:00, 78.80s/it]
num_features_for_Prognosticator = 7: 100%|██████████| 1/1 [01:18<00:00, 78.55s/it]


Prognosticator exectution time = 3.932 mins
RegressionModelTimeStructure fitting and predition time = 0.623 mins




Execution time for iter 1/25 in OPFV = 1.979 mins




Execution time for iter 2/25 in OPFV = 1.953 mins




Execution time for iter 3/25 in OPFV = 1.924 mins




Execution time for iter 4/25 in OPFV = 1.959 mins




Execution time for iter 5/25 in OPFV = 1.991 mins




Execution time for iter 6/25 in OPFV = 1.974 mins




Execution time for iter 7/25 in OPFV = 1.947 mins




Execution time for iter 8/25 in OPFV = 2.000 mins




Execution time for iter 9/25 in OPFV = 1.910 mins




Execution time for iter 10/25 in OPFV = 1.908 mins




Execution time for iter 11/25 in OPFV = 1.995 mins




Execution time for iter 12/25 in OPFV = 1.933 mins




Execution time for iter 13/25 in OPFV = 1.952 mins




Execution time for iter 14/25 in OPFV = 2.006 mins




Execution time for iter 15/25 in OPFV = 1.910 mins




Execution time for iter 16/25 in OPFV = 1.901 mins




Execution time for iter 17/25 in OPFV = 1.914 mins




Execution time for iter 18/25 in OPFV = 1.897 mins




Execution time for iter 19/25 in OPFV = 1.893 mins




Execution time for iter 20/25 in OPFV = 1.925 mins




Execution time for iter 21/25 in OPFV = 1.965 mins




Execution time for iter 22/25 in OPFV = 2.027 mins




Execution time for iter 23/25 in OPFV = 2.010 mins




Execution time for iter 24/25 in OPFV = 1.943 mins


100%|██████████| 25/25 [48:45<00:00, 117.01s/it]
 10%|█         | 1/10 [1:07:38<10:08:44, 4058.28s/it]

Execution time for iter 25/25 in OPFV = 1.937 mins
OPFV exectution time = 49.377 mins
Execution time for OPL: 56.702 mins
#################### END of OPL ####################

ROUND 1/10: test_policy_value_list_DM = {'pi_b': 0.941037128827863, 'reg': 1.7241598258246933, 'ips-pg': 0.9648640822992379, 'dr-pg': 1.113396104946275, 'prognosticator': 1.0345823073495952, 'opfv': 1.1784525045746939}
ROUND 1/10: test_policy_value_list_IPS = {'pi_b': 0.8987712526299668, 'reg': 1.6513392872469839, 'ips-pg': 13.86363852660234, 'dr-pg': 81.22161251001044, 'prognosticator': 0.9066834709895981, 'opfv': 0.8880283164463566}
ROUND 1/10: test_policy_value_list_SNIPS = {'pi_b': 0.8987712526299668, 'reg': 1.7629415399137751, 'ips-pg': 0.29827462821680606, 'dr-pg': 0.29071931061375056, 'prognosticator': 0.8937217331788664, 'opfv': 1.1118557230790824}
ROUND 1/10: test_policy_value_list_SNDR = {'pi_b': 0.9101919220640171, 'reg': 1.8407279598317294, 'ips-pg': -0.9033673349840873, 'dr-pg': -0.779021793074119, '

num_features_for_Prognosticator = 3: 100%|██████████| 1/1 [01:22<00:00, 82.44s/it]
num_features_for_Prognosticator = 5: 100%|██████████| 1/1 [01:21<00:00, 81.05s/it]
num_features_for_Prognosticator = 7: 100%|██████████| 1/1 [01:24<00:00, 84.13s/it]


Prognosticator exectution time = 4.127 mins
RegressionModelTimeStructure fitting and predition time = 0.607 mins




Execution time for iter 1/25 in OPFV = 1.766 mins




Execution time for iter 2/25 in OPFV = 1.679 mins




Execution time for iter 3/25 in OPFV = 1.764 mins




Execution time for iter 4/25 in OPFV = 1.694 mins




Execution time for iter 5/25 in OPFV = 1.775 mins




Execution time for iter 6/25 in OPFV = 1.754 mins




Execution time for iter 7/25 in OPFV = 1.731 mins




Execution time for iter 8/25 in OPFV = 1.672 mins




Execution time for iter 9/25 in OPFV = 1.681 mins




Execution time for iter 10/25 in OPFV = 1.745 mins




Execution time for iter 11/25 in OPFV = 1.798 mins




Execution time for iter 12/25 in OPFV = 1.780 mins




Execution time for iter 13/25 in OPFV = 1.779 mins




Execution time for iter 14/25 in OPFV = 1.775 mins




Execution time for iter 15/25 in OPFV = 1.750 mins




Execution time for iter 16/25 in OPFV = 1.733 mins




Execution time for iter 17/25 in OPFV = 1.706 mins




Execution time for iter 18/25 in OPFV = 1.774 mins




Execution time for iter 19/25 in OPFV = 1.712 mins




Execution time for iter 20/25 in OPFV = 1.749 mins




Execution time for iter 21/25 in OPFV = 1.722 mins




Execution time for iter 22/25 in OPFV = 1.700 mins




Execution time for iter 23/25 in OPFV = 1.802 mins




Execution time for iter 24/25 in OPFV = 1.772 mins


100%|██████████| 25/25 [43:33<00:00, 104.53s/it]
 20%|██        | 2/10 [2:04:15<8:09:15, 3669.49s/it] 

Execution time for iter 25/25 in OPFV = 1.742 mins
OPFV exectution time = 44.164 mins
Execution time for OPL: 51.661 mins
#################### END of OPL ####################

ROUND 2/10: test_policy_value_list_DM = {'pi_b': 0.9693236045334851, 'reg': 1.3058363638394457, 'ips-pg': 0.9989091753764463, 'dr-pg': 1.2598708733989814, 'prognosticator': 1.5700445703903902, 'opfv': 1.258849067612788}
ROUND 2/10: test_policy_value_list_IPS = {'pi_b': 0.9201302293023569, 'reg': 1.1227150586467711, 'ips-pg': 0.8056354422473738, 'dr-pg': 1.0845246557438648, 'prognosticator': 2.169455614987042, 'opfv': 1.0781708974816533}
ROUND 2/10: test_policy_value_list_SNIPS = {'pi_b': 0.9201302293023569, 'reg': 1.154731836299665, 'ips-pg': 0.898510933564432, 'dr-pg': 1.124051091552694, 'prognosticator': 1.8527167553842754, 'opfv': 1.1246790529479278}
ROUND 2/10: test_policy_value_list_SNDR = {'pi_b': 0.9363947569614581, 'reg': 1.2367087064754017, 'ips-pg': 0.9190795250033483, 'dr-pg': 1.2099528139663576, 'prog

num_features_for_Prognosticator = 3: 100%|██████████| 1/1 [01:08<00:00, 68.23s/it]
num_features_for_Prognosticator = 5: 100%|██████████| 1/1 [01:08<00:00, 68.66s/it]
num_features_for_Prognosticator = 7: 100%|██████████| 1/1 [01:03<00:00, 63.87s/it]


Prognosticator exectution time = 3.346 mins
RegressionModelTimeStructure fitting and predition time = 0.472 mins




Execution time for iter 1/25 in OPFV = 1.369 mins




Execution time for iter 2/25 in OPFV = 1.292 mins




Execution time for iter 3/25 in OPFV = 1.274 mins




Execution time for iter 4/25 in OPFV = 1.336 mins




Execution time for iter 5/25 in OPFV = 1.308 mins




Execution time for iter 6/25 in OPFV = 1.344 mins




Execution time for iter 7/25 in OPFV = 1.365 mins




Execution time for iter 8/25 in OPFV = 1.370 mins




Execution time for iter 9/25 in OPFV = 1.365 mins




Execution time for iter 10/25 in OPFV = 1.336 mins




Execution time for iter 11/25 in OPFV = 1.324 mins




Execution time for iter 12/25 in OPFV = 1.337 mins




Execution time for iter 13/25 in OPFV = 1.295 mins




Execution time for iter 14/25 in OPFV = 1.288 mins




Execution time for iter 15/25 in OPFV = 1.294 mins




Execution time for iter 16/25 in OPFV = 1.286 mins




Execution time for iter 17/25 in OPFV = 1.291 mins




Execution time for iter 18/25 in OPFV = 1.293 mins




Execution time for iter 19/25 in OPFV = 1.286 mins




Execution time for iter 20/25 in OPFV = 1.289 mins




Execution time for iter 21/25 in OPFV = 1.317 mins




Execution time for iter 22/25 in OPFV = 1.380 mins




Execution time for iter 23/25 in OPFV = 1.343 mins




Execution time for iter 24/25 in OPFV = 1.357 mins


100%|██████████| 25/25 [33:02<00:00, 79.29s/it]
 30%|███       | 3/10 [2:47:56<6:12:15, 3190.80s/it]

Execution time for iter 25/25 in OPFV = 1.296 mins
OPFV exectution time = 33.509 mins
Execution time for OPL: 39.599 mins
#################### END of OPL ####################

ROUND 3/10: test_policy_value_list_DM = {'pi_b': 1.0638677750912178, 'reg': 1.2636999474412187, 'ips-pg': 1.4475453844352033, 'dr-pg': 1.6517360637643246, 'prognosticator': 1.1387373313912865, 'opfv': 1.5251434142727773}
ROUND 3/10: test_policy_value_list_IPS = {'pi_b': 1.0111085283446324, 'reg': 1.0850632951733197, 'ips-pg': 1.0349341509690613, 'dr-pg': 1.5240834387403053, 'prognosticator': 0.9860143338499775, 'opfv': 1.468868895291128}
ROUND 3/10: test_policy_value_list_SNIPS = {'pi_b': 1.0111085283446324, 'reg': 1.2421447607301057, 'ips-pg': 2.313023419720975, 'dr-pg': 1.6683713379504406, 'prognosticator': 1.0677591525829335, 'opfv': 1.5038144815138628}
ROUND 3/10: test_policy_value_list_SNDR = {'pi_b': 1.0199429983504522, 'reg': 1.2400816054485626, 'ips-pg': 2.46674361018204, 'dr-pg': 1.5776142332347427, 'pro

num_features_for_Prognosticator = 3: 100%|██████████| 1/1 [01:12<00:00, 72.21s/it]
num_features_for_Prognosticator = 5: 100%|██████████| 1/1 [01:13<00:00, 73.26s/it]
num_features_for_Prognosticator = 7: 100%|██████████| 1/1 [01:12<00:00, 72.58s/it]


Prognosticator exectution time = 3.634 mins
RegressionModelTimeStructure fitting and predition time = 0.567 mins




Execution time for iter 1/25 in OPFV = 1.603 mins




Execution time for iter 2/25 in OPFV = 1.607 mins




Execution time for iter 3/25 in OPFV = 1.627 mins




Execution time for iter 4/25 in OPFV = 1.639 mins




Execution time for iter 5/25 in OPFV = 1.594 mins




Execution time for iter 6/25 in OPFV = 1.668 mins




Execution time for iter 7/25 in OPFV = 1.901 mins




Execution time for iter 8/25 in OPFV = 1.659 mins




Execution time for iter 9/25 in OPFV = 1.642 mins




Execution time for iter 10/25 in OPFV = 1.662 mins




Execution time for iter 11/25 in OPFV = 1.597 mins




Execution time for iter 12/25 in OPFV = 1.603 mins




Execution time for iter 13/25 in OPFV = 1.601 mins




Execution time for iter 14/25 in OPFV = 1.599 mins




Execution time for iter 15/25 in OPFV = 1.596 mins




Execution time for iter 16/25 in OPFV = 1.593 mins




Execution time for iter 17/25 in OPFV = 1.605 mins




Execution time for iter 18/25 in OPFV = 1.592 mins




Execution time for iter 19/25 in OPFV = 1.618 mins




Execution time for iter 20/25 in OPFV = 1.646 mins




Execution time for iter 21/25 in OPFV = 1.594 mins




Execution time for iter 22/25 in OPFV = 1.601 mins




Execution time for iter 23/25 in OPFV = 1.599 mins




Execution time for iter 24/25 in OPFV = 1.594 mins


100%|██████████| 25/25 [40:38<00:00, 97.53s/it]
 40%|████      | 4/10 [3:40:07<5:16:42, 3167.12s/it]

Execution time for iter 25/25 in OPFV = 1.599 mins
OPFV exectution time = 41.207 mins
Execution time for OPL: 47.948 mins
#################### END of OPL ####################

ROUND 4/10: test_policy_value_list_DM = {'pi_b': 0.9983321947592232, 'reg': 1.9910319772580725, 'ips-pg': 1.2427817717117053, 'dr-pg': 1.502905281382003, 'prognosticator': 2.1301942301624077, 'opfv': 2.5701934945206775}
ROUND 4/10: test_policy_value_list_IPS = {'pi_b': 0.9119361709850553, 'reg': 1.4333722455897553, 'ips-pg': 0.7399249145193137, 'dr-pg': 1.7415206429710544, 'prognosticator': 1.6977519560732588, 'opfv': 2.183408556818363}
ROUND 4/10: test_policy_value_list_SNIPS = {'pi_b': 0.9119361709850553, 'reg': 1.7444224040414906, 'ips-pg': 1.0347523333553355, 'dr-pg': 1.6352246194013533, 'prognosticator': 2.1997621073063294, 'opfv': 2.4562581362585765}
ROUND 4/10: test_policy_value_list_SNDR = {'pi_b': 0.959690713362632, 'reg': 1.8542891425222123, 'ips-pg': 0.9470360974784341, 'dr-pg': 1.6133795546921756, 'pr

num_features_for_Prognosticator = 3: 100%|██████████| 1/1 [01:30<00:00, 90.26s/it]
num_features_for_Prognosticator = 5: 100%|██████████| 1/1 [01:29<00:00, 89.49s/it]
num_features_for_Prognosticator = 7: 100%|██████████| 1/1 [01:23<00:00, 83.15s/it]


Prognosticator exectution time = 4.382 mins
RegressionModelTimeStructure fitting and predition time = 0.688 mins




Execution time for iter 1/25 in OPFV = 1.865 mins




Execution time for iter 2/25 in OPFV = 1.883 mins




Execution time for iter 3/25 in OPFV = 1.863 mins




Execution time for iter 4/25 in OPFV = 1.897 mins




Execution time for iter 5/25 in OPFV = 1.867 mins




Execution time for iter 6/25 in OPFV = 1.861 mins




Execution time for iter 7/25 in OPFV = 1.873 mins




Execution time for iter 8/25 in OPFV = 1.872 mins




Execution time for iter 9/25 in OPFV = 1.869 mins




Execution time for iter 10/25 in OPFV = 1.869 mins




Execution time for iter 11/25 in OPFV = 1.871 mins




Execution time for iter 12/25 in OPFV = 1.893 mins




Execution time for iter 13/25 in OPFV = 1.869 mins




Execution time for iter 14/25 in OPFV = 1.867 mins




Execution time for iter 15/25 in OPFV = 1.877 mins




Execution time for iter 16/25 in OPFV = 1.868 mins




Execution time for iter 17/25 in OPFV = 1.873 mins




Execution time for iter 18/25 in OPFV = 1.880 mins




Execution time for iter 19/25 in OPFV = 1.906 mins




Execution time for iter 20/25 in OPFV = 1.885 mins




Execution time for iter 21/25 in OPFV = 1.884 mins




Execution time for iter 22/25 in OPFV = 1.887 mins




Execution time for iter 23/25 in OPFV = 1.875 mins




Execution time for iter 24/25 in OPFV = 1.911 mins


100%|██████████| 25/25 [46:56<00:00, 112.66s/it]
 50%|█████     | 5/10 [4:40:37<4:37:50, 3334.00s/it]

Execution time for iter 25/25 in OPFV = 1.875 mins
OPFV exectution time = 47.630 mins
Execution time for OPL: 55.861 mins
#################### END of OPL ####################

ROUND 5/10: test_policy_value_list_DM = {'pi_b': 0.9018935188688187, 'reg': 1.325647055184574, 'ips-pg': 0.9327917263934115, 'dr-pg': 1.1444607528163446, 'prognosticator': 0.951096755866331, 'opfv': 1.5961535889209124}
ROUND 5/10: test_policy_value_list_IPS = {'pi_b': 0.8596314751321196, 'reg': 1.2139114544119385, 'ips-pg': 0.6723303182473321, 'dr-pg': 1.001938662088689, 'prognosticator': 0.7853258344556122, 'opfv': 1.5467070738931623}
ROUND 5/10: test_policy_value_list_SNIPS = {'pi_b': 0.8596314751321196, 'reg': 1.270382026111341, 'ips-pg': 0.8685661481369341, 'dr-pg': 1.1055467814518745, 'prognosticator': 0.8927193590885817, 'opfv': 1.6242110325206747}
ROUND 5/10: test_policy_value_list_SNDR = {'pi_b': 0.8697799727841974, 'reg': 1.2359272068337084, 'ips-pg': 0.8190712084912469, 'dr-pg': 1.1201411701692159, 'pro

num_features_for_Prognosticator = 3: 100%|██████████| 1/1 [01:09<00:00, 69.07s/it]
num_features_for_Prognosticator = 5: 100%|██████████| 1/1 [01:09<00:00, 69.49s/it]
num_features_for_Prognosticator = 7: 100%|██████████| 1/1 [01:11<00:00, 71.81s/it]


Prognosticator exectution time = 3.506 mins
RegressionModelTimeStructure fitting and predition time = 0.557 mins




Execution time for iter 1/25 in OPFV = 1.631 mins




Execution time for iter 2/25 in OPFV = 1.642 mins




Execution time for iter 3/25 in OPFV = 1.632 mins




Execution time for iter 4/25 in OPFV = 1.615 mins




Execution time for iter 5/25 in OPFV = 1.619 mins




Execution time for iter 6/25 in OPFV = 1.633 mins




Execution time for iter 7/25 in OPFV = 1.618 mins




Execution time for iter 8/25 in OPFV = 1.624 mins




Execution time for iter 9/25 in OPFV = 1.648 mins




Execution time for iter 10/25 in OPFV = 1.617 mins




Execution time for iter 11/25 in OPFV = 1.620 mins




Execution time for iter 12/25 in OPFV = 1.616 mins




Execution time for iter 13/25 in OPFV = 1.621 mins




Execution time for iter 14/25 in OPFV = 1.621 mins




Execution time for iter 15/25 in OPFV = 1.615 mins




Execution time for iter 16/25 in OPFV = 1.623 mins




Execution time for iter 17/25 in OPFV = 1.646 mins




Execution time for iter 18/25 in OPFV = 1.624 mins




Execution time for iter 19/25 in OPFV = 1.626 mins




Execution time for iter 20/25 in OPFV = 1.621 mins




Execution time for iter 21/25 in OPFV = 1.620 mins




Execution time for iter 22/25 in OPFV = 1.623 mins




Execution time for iter 23/25 in OPFV = 1.636 mins




Execution time for iter 24/25 in OPFV = 1.615 mins


100%|██████████| 25/25 [40:37<00:00, 97.51s/it]
 60%|██████    | 6/10 [5:32:21<3:37:03, 3255.86s/it]

Execution time for iter 25/25 in OPFV = 1.623 mins
OPFV exectution time = 41.187 mins
Execution time for OPL: 47.780 mins
#################### END of OPL ####################

ROUND 6/10: test_policy_value_list_DM = {'pi_b': 0.9438716327481742, 'reg': 1.2755702381413316, 'ips-pg': 0.8245269922086045, 'dr-pg': 1.2830090324431873, 'prognosticator': 1.4135631672311606, 'opfv': 1.2822215697769788}
ROUND 6/10: test_policy_value_list_IPS = {'pi_b': 0.8917415515848629, 'reg': 1.1333909294093278, 'ips-pg': 0.28590867737573356, 'dr-pg': 1.1601352966716085, 'prognosticator': 1.0241669012845962, 'opfv': 1.158317927670073}
ROUND 6/10: test_policy_value_list_SNIPS = {'pi_b': 0.8917415515848629, 'reg': 1.1937544400952964, 'ips-pg': 1.2755464382208233, 'dr-pg': 1.199928883739306, 'prognosticator': 1.3473192974139638, 'opfv': 1.2002535646894281}
ROUND 6/10: test_policy_value_list_SNDR = {'pi_b': 0.9018475004263488, 'reg': 1.1645159324793977, 'ips-pg': 0.6674932650930676, 'dr-pg': 1.1742530478718072, '

num_features_for_Prognosticator = 3: 100%|██████████| 1/1 [01:04<00:00, 64.13s/it]
num_features_for_Prognosticator = 5: 100%|██████████| 1/1 [01:05<00:00, 65.15s/it]
num_features_for_Prognosticator = 7: 100%|██████████| 1/1 [01:05<00:00, 65.96s/it]


Prognosticator exectution time = 3.254 mins
RegressionModelTimeStructure fitting and predition time = 0.454 mins




Execution time for iter 1/25 in OPFV = 1.411 mins




Execution time for iter 2/25 in OPFV = 1.411 mins




Execution time for iter 3/25 in OPFV = 1.411 mins




Execution time for iter 4/25 in OPFV = 1.410 mins




Execution time for iter 5/25 in OPFV = 1.406 mins




Execution time for iter 6/25 in OPFV = 1.408 mins




Execution time for iter 7/25 in OPFV = 1.417 mins




Execution time for iter 8/25 in OPFV = 1.411 mins




Execution time for iter 9/25 in OPFV = 1.428 mins




Execution time for iter 10/25 in OPFV = 1.417 mins




Execution time for iter 11/25 in OPFV = 1.404 mins




Execution time for iter 12/25 in OPFV = 1.403 mins




Execution time for iter 13/25 in OPFV = 1.413 mins




Execution time for iter 14/25 in OPFV = 1.409 mins




Execution time for iter 15/25 in OPFV = 1.406 mins




Execution time for iter 16/25 in OPFV = 1.411 mins




Execution time for iter 17/25 in OPFV = 1.414 mins




Execution time for iter 18/25 in OPFV = 1.419 mins




Execution time for iter 19/25 in OPFV = 1.431 mins




Execution time for iter 20/25 in OPFV = 1.414 mins




Execution time for iter 21/25 in OPFV = 1.416 mins




Execution time for iter 22/25 in OPFV = 1.412 mins




Execution time for iter 23/25 in OPFV = 1.415 mins




Execution time for iter 24/25 in OPFV = 1.408 mins


100%|██████████| 25/25 [35:18<00:00, 84.76s/it]

Execution time for iter 25/25 in OPFV = 1.411 mins
OPFV exectution time = 35.772 mins
Execution time for OPL: 41.713 mins
#################### END of OPL ####################

ROUND 7/10: test_policy_value_list_DM = {'pi_b': 0.8794112001142762, 'reg': 1.2017549287090759, 'ips-pg': 0.8733931680905946, 'dr-pg': 0.9374715365892202, 'prognosticator': 0.910069821911282, 'opfv': 1.1907411061442181}
ROUND 7/10: test_policy_value_list_IPS = {'pi_b': 0.8335465486822332, 'reg': 1.063021493976164, 'ips-pg': 0.6418260224493788, 'dr-pg': 0.9819125184839932, 'prognosticator': 0.9566824493566969, 'opfv': 1.1684822397352757}
ROUND 7/10: test_policy_value_list_SNIPS = {'pi_b': 0.8335465486822332, 'reg': 1.1394597039510523, 'ips-pg': 0.7988971664190847, 'dr-pg': 0.7509870255393987, 'prognosticator': 0.8404003394948715, 'opfv': 1.125497967415854}
ROUND 7/10: test_policy_value_list_SNDR = {'pi_b': 0.8411495459754138, 'reg': 1.1623904373077936, 'ips-pg': 0.8021930284690203, 'dr-pg': 0.3191315569876408, 'pr


 70%|███████   | 7/10 [6:17:52<2:34:13, 3084.36s/it]


############################################### START of ROUND 8/10 ###############################################

#################### START of preprocessing ####################
Maximum number of unique actions that we can use for training and test data = 2547
Execution time of preprocess_item_categories = 0.007 mins
Executing preprocess_item_daily_features
Execution time for preprocess_item_daily_features = 1.583 mins
Executing preprocess_user_features
Execution time for preprocess_user_features = 0.002 mins
Executing reduce_the_dim_context
The dimension of the original context = 131
The dimension of the context after PCA = 60
The cumularive explained variance for the dimension of context is 0.736
Execution time for educe_the_dim_context = 0.071 mins
Executing reduce_the_dim_action_context
The dimension of the original action context = 619
The dimension of the action context after PCA = 40
The cumularive explained variance for the dimension of action context is 0.819
Execution ti

num_features_for_Prognosticator = 3: 100%|██████████| 1/1 [01:05<00:00, 65.10s/it]
num_features_for_Prognosticator = 5: 100%|██████████| 1/1 [01:05<00:00, 65.98s/it]
num_features_for_Prognosticator = 7: 100%|██████████| 1/1 [01:06<00:00, 66.18s/it]


Prognosticator exectution time = 3.288 mins
RegressionModelTimeStructure fitting and predition time = 0.491 mins




Execution time for iter 1/25 in OPFV = 1.477 mins




Execution time for iter 2/25 in OPFV = 1.485 mins




Execution time for iter 3/25 in OPFV = 1.507 mins




Execution time for iter 4/25 in OPFV = 1.483 mins




Execution time for iter 5/25 in OPFV = 1.481 mins




Execution time for iter 6/25 in OPFV = 1.487 mins




Execution time for iter 7/25 in OPFV = 1.482 mins




Execution time for iter 8/25 in OPFV = 1.479 mins




Execution time for iter 9/25 in OPFV = 1.487 mins




Execution time for iter 10/25 in OPFV = 1.480 mins




Execution time for iter 11/25 in OPFV = 1.486 mins




Execution time for iter 12/25 in OPFV = 1.505 mins




Execution time for iter 13/25 in OPFV = 1.483 mins




Execution time for iter 14/25 in OPFV = 1.478 mins




Execution time for iter 15/25 in OPFV = 1.480 mins




Execution time for iter 16/25 in OPFV = 1.499 mins




Execution time for iter 17/25 in OPFV = 1.480 mins




Execution time for iter 18/25 in OPFV = 1.478 mins




Execution time for iter 19/25 in OPFV = 1.510 mins




Execution time for iter 20/25 in OPFV = 1.485 mins




Execution time for iter 21/25 in OPFV = 1.477 mins




Execution time for iter 22/25 in OPFV = 1.487 mins




Execution time for iter 23/25 in OPFV = 1.483 mins




Execution time for iter 24/25 in OPFV = 1.480 mins


100%|██████████| 25/25 [37:08<00:00, 89.15s/it]
 80%|████████  | 8/10 [7:05:25<1:40:21, 3010.57s/it]

Execution time for iter 25/25 in OPFV = 1.488 mins
OPFV exectution time = 37.639 mins
Execution time for OPL: 43.661 mins
#################### END of OPL ####################

ROUND 8/10: test_policy_value_list_DM = {'pi_b': 0.9870948089595878, 'reg': 1.4972753251833428, 'ips-pg': 1.3527450347847676, 'dr-pg': 1.5129153112005806, 'prognosticator': 1.2974958822055425, 'opfv': 1.5095235450927624}
ROUND 8/10: test_policy_value_list_IPS = {'pi_b': 0.9310088725878963, 'reg': 1.0690260742408146, 'ips-pg': 0.8175104049525852, 'dr-pg': 1.0648176967609913, 'prognosticator': 1.1711240729972643, 'opfv': 1.099049429658022}
ROUND 8/10: test_policy_value_list_SNIPS = {'pi_b': 0.9310088725878963, 'reg': 1.4056058600545727, 'ips-pg': 1.4132918457885542, 'dr-pg': 1.4378018619056179, 'prognosticator': 1.316316498737678, 'opfv': 1.4339035900295898}
ROUND 8/10: test_policy_value_list_SNDR = {'pi_b': 0.9543845925314234, 'reg': 1.3624621368165133, 'ips-pg': 1.4497040093743057, 'dr-pg': 1.3946651874241995, 'p

num_features_for_Prognosticator = 3: 100%|██████████| 1/1 [01:01<00:00, 61.74s/it]
num_features_for_Prognosticator = 5: 100%|██████████| 1/1 [01:02<00:00, 62.08s/it]
num_features_for_Prognosticator = 7: 100%|██████████| 1/1 [01:01<00:00, 61.46s/it]


Prognosticator exectution time = 3.088 mins
RegressionModelTimeStructure fitting and predition time = 0.493 mins




Execution time for iter 1/25 in OPFV = 1.362 mins




Execution time for iter 2/25 in OPFV = 1.357 mins




Execution time for iter 3/25 in OPFV = 1.353 mins




Execution time for iter 4/25 in OPFV = 1.359 mins




Execution time for iter 5/25 in OPFV = 1.358 mins




Execution time for iter 6/25 in OPFV = 1.358 mins




Execution time for iter 7/25 in OPFV = 1.360 mins




Execution time for iter 8/25 in OPFV = 1.362 mins




Execution time for iter 9/25 in OPFV = 1.382 mins




Execution time for iter 10/25 in OPFV = 1.345 mins




Execution time for iter 11/25 in OPFV = 1.349 mins




Execution time for iter 12/25 in OPFV = 1.356 mins




Execution time for iter 13/25 in OPFV = 1.371 mins




Execution time for iter 14/25 in OPFV = 1.353 mins




Execution time for iter 15/25 in OPFV = 1.352 mins




Execution time for iter 16/25 in OPFV = 1.356 mins




Execution time for iter 17/25 in OPFV = 1.354 mins




Execution time for iter 18/25 in OPFV = 1.360 mins




Execution time for iter 19/25 in OPFV = 1.353 mins




Execution time for iter 20/25 in OPFV = 1.355 mins




Execution time for iter 21/25 in OPFV = 1.382 mins




Execution time for iter 22/25 in OPFV = 1.365 mins




Execution time for iter 23/25 in OPFV = 1.362 mins




Execution time for iter 24/25 in OPFV = 1.358 mins


100%|██████████| 25/25 [33:58<00:00, 81.55s/it]

Execution time for iter 25/25 in OPFV = 1.356 mins
OPFV exectution time = 34.475 mins
Execution time for OPL: 40.142 mins



 90%|█████████ | 9/10 [7:49:23<48:13, 2893.95s/it]  

#################### END of OPL ####################

ROUND 9/10: test_policy_value_list_DM = {'pi_b': 0.9509455342956529, 'reg': 1.1901425673917352, 'ips-pg': 0.9410937070614238, 'dr-pg': 1.2318066169296968, 'prognosticator': 1.1476094838174127, 'opfv': 1.231629971266394}
ROUND 9/10: test_policy_value_list_IPS = {'pi_b': 0.9111042825650352, 'reg': 1.043108219161466, 'ips-pg': 0.6816104221603475, 'dr-pg': 1.1803294487386322, 'prognosticator': 1.0155597129640355, 'opfv': 1.1788434778790797}
ROUND 9/10: test_policy_value_list_SNIPS = {'pi_b': 0.9111042825650352, 'reg': 1.1818299812752338, 'ips-pg': 0.9275837117948145, 'dr-pg': 1.180589541124282, 'prognosticator': 1.1504739937941124, 'opfv': 1.1813657584972197}
ROUND 9/10: test_policy_value_list_SNDR = {'pi_b': 0.9137950208109735, 'reg': 1.1674020496617394, 'ips-pg': 0.9162231082719846, 'dr-pg': 1.1881405661263944, 'prognosticator': 1.1029560450399223, 'opfv': 1.1894851303254137}
############################################### END of ROUN

num_features_for_Prognosticator = 3: 100%|██████████| 1/1 [01:03<00:00, 63.45s/it]
num_features_for_Prognosticator = 5: 100%|██████████| 1/1 [01:05<00:00, 65.39s/it]
num_features_for_Prognosticator = 7: 100%|██████████| 1/1 [01:06<00:00, 66.29s/it]


Prognosticator exectution time = 3.252 mins
RegressionModelTimeStructure fitting and predition time = 0.487 mins




Execution time for iter 1/25 in OPFV = 1.405 mins




Execution time for iter 2/25 in OPFV = 1.404 mins




Execution time for iter 3/25 in OPFV = 1.408 mins




Execution time for iter 4/25 in OPFV = 1.413 mins




Execution time for iter 5/25 in OPFV = 1.410 mins




Execution time for iter 6/25 in OPFV = 1.414 mins




Execution time for iter 7/25 in OPFV = 1.464 mins




Execution time for iter 8/25 in OPFV = 1.419 mins




Execution time for iter 9/25 in OPFV = 1.439 mins




Execution time for iter 10/25 in OPFV = 1.420 mins




Execution time for iter 11/25 in OPFV = 1.462 mins




Execution time for iter 12/25 in OPFV = 1.429 mins




Execution time for iter 13/25 in OPFV = 1.407 mins




Execution time for iter 14/25 in OPFV = 1.429 mins




Execution time for iter 15/25 in OPFV = 1.423 mins




Execution time for iter 16/25 in OPFV = 1.416 mins




Execution time for iter 17/25 in OPFV = 1.398 mins




Execution time for iter 18/25 in OPFV = 1.398 mins




Execution time for iter 19/25 in OPFV = 1.410 mins




Execution time for iter 20/25 in OPFV = 1.418 mins




Execution time for iter 21/25 in OPFV = 1.413 mins




Execution time for iter 22/25 in OPFV = 1.419 mins




Execution time for iter 23/25 in OPFV = 1.410 mins




Execution time for iter 24/25 in OPFV = 1.400 mins


100%|██████████| 25/25 [35:26<00:00, 85.04s/it]
100%|██████████| 10/10 [8:34:45<00:00, 3088.56s/it]

Execution time for iter 25/25 in OPFV = 1.406 mins
OPFV exectution time = 35.922 mins
Execution time for OPL: 41.797 mins
#################### END of OPL ####################

ROUND 10/10: test_policy_value_list_DM = {'pi_b': 0.8424887314666999, 'reg': 1.5715857883189235, 'ips-pg': 1.2419674821981237, 'dr-pg': 1.5892156410146825, 'prognosticator': 1.3509442076124702, 'opfv': 1.5903845809851012}
ROUND 10/10: test_policy_value_list_IPS = {'pi_b': 0.7948299560759896, 'reg': 1.3663214815772187, 'ips-pg': 0.4384038235713545, 'dr-pg': 1.2884327948480163, 'prognosticator': 1.1907405194032772, 'opfv': 1.2744288129966694}
ROUND 10/10: test_policy_value_list_SNIPS = {'pi_b': 0.7948299560759896, 'reg': 1.4588580711007244, 'ips-pg': 1.0179232909673972, 'dr-pg': 1.4627182630468856, 'prognosticator': 1.3396632521065488, 'opfv': 1.4595642067363535}
ROUND 10/10: test_policy_value_list_SNDR = {'pi_b': 0.8054617163829249, 'reg': 1.4226546885474338, 'ips-pg': 1.1134269017062866, 'dr-pg': 1.43297323841331




In [7]:
### Write the result tables to the CSV files
# result_pi_learned = pi_learned_list_all_results.copy()
result_df_DM = test_policy_value_list_DM_all_results.copy()
result_df_IPS = test_policy_value_list_IPS_all_results.copy()
result_df_SNIPS = test_policy_value_list_SNIPS_all_results.copy()
result_df_SNDR = test_policy_value_list_SNDR_all_results.copy()

result_df_DM = pd.DataFrame(result_df_DM)
result_df_IPS = pd.DataFrame(result_df_IPS)
result_df_SNIPS = pd.DataFrame(result_df_SNIPS)
result_df_SNDR = pd.DataFrame(result_df_SNDR)

result_df_DM.to_csv(df_path / "result_df_DM.csv")
result_df_IPS.to_csv(df_path / "result_df_IPS.csv")
result_df_SNIPS.to_csv(df_path / "result_df_SNIPS.csv")
result_df_SNDR.to_csv(df_path / "result_df_SNDR.csv")