In [1]:
%reload_ext autoreload
%autoreload 2

In [2]:
import time
import torch
import pandas as pd
from sklearn.utils import check_random_state
from opl import OPL, evaluate_OPL_algorithm, OPL_OPFV_tune_phi
import warnings
warnings.filterwarnings('ignore')
import conf
from tqdm import tqdm
from preprocess import pre_process
from logging import getLogger
from pathlib import Path

In [3]:
time_whole_execution_start = time.time() 

In [4]:
logger = getLogger(__name__)
logger.info(f"The current working directory is {Path().cwd()}")

# log path
log_path = Path("./real_data")
df_path = log_path / "df"
df_path.mkdir(exist_ok=True, parents=True)

INFO:__main__:The current working directory is /Users/a81808/document/OPFV/icml2024-opfv/src-optimize-phi/real/F-OPL-newest


In [5]:
# If you are running locally, make sure you are in the directory of KuaiRec.
rootpath="../../../../KuaiRec/"

# Read the CSV files
print("Loading big matrix...")
big_matrix = pd.read_csv(rootpath + "data/big_matrix.csv")
print("Loading small matrix...")
small_matrix = pd.read_csv(rootpath + "data/small_matrix.csv")

print("Loading social network...")
social_network = pd.read_csv(rootpath + "data/social_network.csv")
social_network["friend_list"] = social_network["friend_list"].map(eval)

print("Loading item features...")
item_categories = pd.read_csv(rootpath + "data/item_categories.csv")
item_categories["feat"] = item_categories["feat"].map(eval)

print("Loading user features...")
user_features = pd.read_csv(rootpath + "data/user_features.csv")

print("Loading items' daily features...")
item_daily_features = pd.read_csv(rootpath + "data/item_daily_features.csv")

print("All data loaded.")

Loading big matrix...
Loading small matrix...
Loading social network...
Loading item features...
Loading user features...
Loading items' daily features...
All data loaded.


In [6]:
# Set seed
torch.manual_seed(conf.random_state)
random_ = check_random_state(conf.random_state)

test_policy_value_list_DM_all_results = []
test_policy_value_list_IPS_all_results = []
test_policy_value_list_SNIPS_all_results = []
test_policy_value_list_SNDR_all_results = []
pi_learned_list_all_results = []

for _ in tqdm(range(conf.n_seeds)):
    print(f"\n############################################### START of ROUND {_ + 1}/{conf.n_seeds} ###############################################")

    ### Preprocess ###
    time_pre_process_start = time.time()
    dataset, dataset_train, dataset_test = pre_process(small_matrix, big_matrix, item_categories, item_daily_features, user_features, social_network, 
                                                       random_state = conf.random_state + _, n_actions=conf.n_actions, dim_context=conf.dim_context, dim_action_context=conf.dim_action_context)
    time_pre_process_end = time.time()

    elapsed_time = time_pre_process_end - time_pre_process_start
    print(f'Execution time for preprocessing = {elapsed_time / 60:.3f} mins') 

    ### OPL ###
    start_time = time.time()

    pi_opfv_tuned = OPL_OPFV_tune_phi(
        dataset = dataset, 
        dataset_test = dataset_test, 
        dataset_train = dataset_train, 
        time_test = dataset_test["time"], 
        round = conf.random_state + _, 

        num_time_structure_for_OPFV_reward = conf.num_time_structure_for_OPFV_reward, 
        phi_scalar_func_for_OPFV = conf.phi_scalar_func_for_OPFV, 
        
        n_actions = dataset["n_actions"], 
        dim_context = dataset["dim_context"], 

        max_iter = conf.max_iter, 
        batch_size = conf.batch_size, 
        num_time_learn = conf.num_time_learn, 
    )


    end_time = time.time()
    elapsed_time = end_time - start_time

    print(f"Execution time for OPL: {elapsed_time / 60:.3f} mins")


    ### Evaluate the learned policy ###
    test_policy_value_list_DM, test_policy_value_list_IPS, test_policy_value_list_SNIPS, test_policy_value_list_SNDR = evaluate_OPL_algorithm(dataset_test=dataset_test, 
                                                                                # pi_reg=pi_reg, 
                                                                                # pi_ips=pi_ips, 
                                                                                # pi_dr=pi_dr, 

                                                                                # pi_prognosticator_DM=pi_prognosticator_DM, 
                                                                                # pi_prognosticator_IPS=pi_prognosticator_IPS, 
                                                                                # pi_prognosticator_SNIPS=pi_prognosticator_SNIPS, 
                                                                                # pi_prognosticator_SNDR=pi_prognosticator_SNDR, 
                                                                                # pi_opfv=pi_opfv, 
                                                                                pi_opfv_tuned=pi_opfv_tuned, 

                                                                                test_policy_value_list_DM_all_results = test_policy_value_list_DM_all_results, 
                                                                                test_policy_value_list_IPS_all_results = test_policy_value_list_IPS_all_results, 
                                                                                test_policy_value_list_SNIPS_all_results = test_policy_value_list_SNIPS_all_results, 
                                                                                test_policy_value_list_SNDR_all_results = test_policy_value_list_SNDR_all_results, 
                                                                                round = _, 
                                                                                )
    
    ### Write the result tables to the CSV files
    # result_pi_learned = pi_learned_list_all_results.copy()
    result_df_DM = test_policy_value_list_DM_all_results.copy()
    result_df_IPS = test_policy_value_list_IPS_all_results.copy()
    result_df_SNIPS = test_policy_value_list_SNIPS_all_results.copy()
    result_df_SNDR = test_policy_value_list_SNDR_all_results.copy()

    result_df_DM = pd.DataFrame(result_df_DM)
    result_df_IPS = pd.DataFrame(result_df_IPS)
    result_df_SNIPS = pd.DataFrame(result_df_SNIPS)
    result_df_SNDR = pd.DataFrame(result_df_SNDR)

    result_df_DM.to_csv(df_path / "result_df_DM_opfv_tuned.csv")
    result_df_IPS.to_csv(df_path / "result_df_IPS_opfv_tuned.csv")
    result_df_SNIPS.to_csv(df_path / "result_df_SNIPS_opfv_tuned.csv")
    result_df_SNDR.to_csv(df_path / "result_df_SNDR_opfv_tuned.csv")

    print(f"############################################### END of ROUND {_ + 1}/{conf.n_seeds} ###############################################\n\n\n")

time_whole_execution_end = time.time()

time_whole_execution = time_whole_execution_end - time_whole_execution_start

print(f'Execution time = {time_whole_execution / 60:.3f} mins')

  0%|          | 0/10 [00:00<?, ?it/s]


############################################### START of ROUND 1/10 ###############################################

#################### START of preprocessing ####################
Maximum number of unique actions that we can use for training and test data = 2547
Execution time of preprocess_item_categories = 0.006 mins
Executing preprocess_item_daily_features
Execution time for preprocess_item_daily_features = 1.304 mins
Executing preprocess_user_features
Execution time for preprocess_user_features = 0.002 mins
Executing reduce_the_dim_context
The dimension of the original context = 131
The dimension of the context after PCA = 60
The cumularive explained variance for the dimension of context is 0.736
Execution time for educe_the_dim_context = 0.041 mins
Executing reduce_the_dim_action_context
The dimension of the original action context = 619
The dimension of the action context after PCA = 40
The cumularive explained variance for the dimension of action context is 0.780
Execution ti



Execution time for iter 1/25 in OPFV = 13.287 mins




Execution time for iter 2/25 in OPFV = 13.177 mins




Execution time for iter 3/25 in OPFV = 13.232 mins




Execution time for iter 4/25 in OPFV = 13.196 mins




Execution time for iter 5/25 in OPFV = 13.252 mins




Execution time for iter 6/25 in OPFV = 13.289 mins




Execution time for iter 7/25 in OPFV = 13.230 mins




Execution time for iter 8/25 in OPFV = 13.219 mins




Execution time for iter 9/25 in OPFV = 13.182 mins




Execution time for iter 10/25 in OPFV = 13.016 mins




Execution time for iter 11/25 in OPFV = 13.026 mins




Execution time for iter 12/25 in OPFV = 13.010 mins




Execution time for iter 13/25 in OPFV = 13.144 mins




Execution time for iter 14/25 in OPFV = 13.080 mins




Execution time for iter 15/25 in OPFV = 13.252 mins




Execution time for iter 16/25 in OPFV = 13.315 mins




Execution time for iter 17/25 in OPFV = 13.162 mins




Execution time for iter 18/25 in OPFV = 13.180 mins




Execution time for iter 19/25 in OPFV = 13.218 mins




Execution time for iter 20/25 in OPFV = 13.239 mins




Execution time for iter 21/25 in OPFV = 13.213 mins




Execution time for iter 22/25 in OPFV = 13.194 mins




Execution time for iter 23/25 in OPFV = 13.179 mins




Execution time for iter 24/25 in OPFV = 13.222 mins


100%|██████████| 25/25 [5:29:44<00:00, 791.38s/it]
 10%|█         | 1/10 [5:33:47<50:04:03, 20027.06s/it]

Execution time for iter 25/25 in OPFV = 13.226 mins
OPFV (tuned) exectution time = 329.742 mins
Execution time for OPL: 330.312 mins
#################### END of OPL ####################

ROUND 1/10: test_policy_value_list_DM = {'pi_b': 0.941037128827863, 'opfv (tuned)': 1.0437826671901833}
ROUND 1/10: test_policy_value_list_IPS = {'pi_b': 0.8987712526299668, 'opfv (tuned)': 0.7688803548800757}
ROUND 1/10: test_policy_value_list_SNIPS = {'pi_b': 0.8987712526299668, 'opfv (tuned)': 1.0060234964626154}
ROUND 1/10: test_policy_value_list_SNDR = {'pi_b': 0.9101919220640172, 'opfv (tuned)': 0.9811716901993927}
############################################### END of ROUND 1/10 ###############################################




############################################### START of ROUND 2/10 ###############################################

#################### START of preprocessing ####################
Maximum number of unique actions that we can use for training and test data = 2547
Execu



Execution time for iter 1/25 in OPFV = 11.247 mins




Execution time for iter 2/25 in OPFV = 11.208 mins




Execution time for iter 3/25 in OPFV = 11.271 mins




Execution time for iter 4/25 in OPFV = 11.304 mins




Execution time for iter 5/25 in OPFV = 11.296 mins




Execution time for iter 6/25 in OPFV = 11.301 mins




Execution time for iter 7/25 in OPFV = 11.305 mins




Execution time for iter 8/25 in OPFV = 11.323 mins




Execution time for iter 9/25 in OPFV = 11.246 mins




Execution time for iter 10/25 in OPFV = 11.321 mins




Execution time for iter 11/25 in OPFV = 11.250 mins




Execution time for iter 12/25 in OPFV = 11.300 mins




Execution time for iter 13/25 in OPFV = 11.317 mins




Execution time for iter 14/25 in OPFV = 11.274 mins




Execution time for iter 15/25 in OPFV = 11.338 mins




Execution time for iter 16/25 in OPFV = 11.253 mins




Execution time for iter 17/25 in OPFV = 11.322 mins




Execution time for iter 18/25 in OPFV = 11.257 mins




Execution time for iter 19/25 in OPFV = 11.296 mins




Execution time for iter 20/25 in OPFV = 11.298 mins




Execution time for iter 21/25 in OPFV = 11.298 mins




Execution time for iter 22/25 in OPFV = 11.291 mins




Execution time for iter 23/25 in OPFV = 11.232 mins




Execution time for iter 24/25 in OPFV = 11.269 mins


100%|██████████| 25/25 [4:42:06<00:00, 677.06s/it]
 20%|██        | 2/10 [10:19:38<40:44:42, 18335.28s/it]

Execution time for iter 25/25 in OPFV = 11.291 mins
OPFV (tuned) exectution time = 282.109 mins
Execution time for OPL: 282.588 mins
#################### END of OPL ####################

ROUND 2/10: test_policy_value_list_DM = {'pi_b': 0.9693236045334851, 'opfv (tuned)': 1.2628709744568185}
ROUND 2/10: test_policy_value_list_IPS = {'pi_b': 0.9201302293023569, 'opfv (tuned)': 1.0811078800020473}
ROUND 2/10: test_policy_value_list_SNIPS = {'pi_b': 0.9201302293023569, 'opfv (tuned)': 1.1234387805292954}
ROUND 2/10: test_policy_value_list_SNDR = {'pi_b': 0.9363947569614581, 'opfv (tuned)': 1.2139522231627002}
############################################### END of ROUND 2/10 ###############################################




############################################### START of ROUND 3/10 ###############################################

#################### START of preprocessing ####################
Maximum number of unique actions that we can use for training and test data = 2547
Exec



Execution time for iter 1/25 in OPFV = 8.946 mins




Execution time for iter 2/25 in OPFV = 8.914 mins




Execution time for iter 3/25 in OPFV = 8.917 mins




Execution time for iter 4/25 in OPFV = 8.963 mins




Execution time for iter 5/25 in OPFV = 8.948 mins




Execution time for iter 6/25 in OPFV = 9.163 mins




Execution time for iter 7/25 in OPFV = 9.156 mins




Execution time for iter 8/25 in OPFV = 9.314 mins




Execution time for iter 9/25 in OPFV = 9.208 mins




Execution time for iter 10/25 in OPFV = 8.860 mins




Execution time for iter 11/25 in OPFV = 8.806 mins




Execution time for iter 12/25 in OPFV = 8.921 mins




Execution time for iter 13/25 in OPFV = 8.989 mins




Execution time for iter 14/25 in OPFV = 8.981 mins




Execution time for iter 15/25 in OPFV = 8.896 mins




Execution time for iter 16/25 in OPFV = 8.845 mins




Execution time for iter 17/25 in OPFV = 9.286 mins




Execution time for iter 18/25 in OPFV = 8.983 mins




Execution time for iter 19/25 in OPFV = 9.013 mins




Execution time for iter 20/25 in OPFV = 9.007 mins




Execution time for iter 21/25 in OPFV = 8.957 mins




Execution time for iter 22/25 in OPFV = 8.977 mins




Execution time for iter 23/25 in OPFV = 8.943 mins




Execution time for iter 24/25 in OPFV = 8.991 mins


100%|██████████| 25/25 [3:44:55<00:00, 539.81s/it]
 30%|███       | 3/10 [14:07:41<31:31:17, 16211.12s/it]

Execution time for iter 25/25 in OPFV = 8.935 mins
OPFV (tuned) exectution time = 224.921 mins
Execution time for OPL: 225.257 mins
#################### END of OPL ####################

ROUND 3/10: test_policy_value_list_DM = {'pi_b': 1.0638677750912178, 'opfv (tuned)': 1.5268852225564373}
ROUND 3/10: test_policy_value_list_IPS = {'pi_b': 1.0111085283446324, 'opfv (tuned)': 1.4648905277558437}
ROUND 3/10: test_policy_value_list_SNIPS = {'pi_b': 1.0111085283446324, 'opfv (tuned)': 1.490655946630267}
ROUND 3/10: test_policy_value_list_SNDR = {'pi_b': 1.0199429983504522, 'opfv (tuned)': 1.4745494226519187}
############################################### END of ROUND 3/10 ###############################################




############################################### START of ROUND 4/10 ###############################################

#################### START of preprocessing ####################
Maximum number of unique actions that we can use for training and test data = 2547
Execut



Execution time for iter 1/25 in OPFV = 10.745 mins




Execution time for iter 2/25 in OPFV = 10.756 mins




Execution time for iter 3/25 in OPFV = 10.754 mins




Execution time for iter 4/25 in OPFV = 10.821 mins




Execution time for iter 5/25 in OPFV = 10.732 mins




Execution time for iter 6/25 in OPFV = 10.759 mins




Execution time for iter 7/25 in OPFV = 10.756 mins




Execution time for iter 8/25 in OPFV = 10.791 mins




Execution time for iter 9/25 in OPFV = 10.751 mins




Execution time for iter 10/25 in OPFV = 10.750 mins




Execution time for iter 11/25 in OPFV = 10.786 mins




Execution time for iter 12/25 in OPFV = 10.875 mins




Execution time for iter 13/25 in OPFV = 10.929 mins




Execution time for iter 14/25 in OPFV = 10.783 mins




Execution time for iter 15/25 in OPFV = 10.772 mins




Execution time for iter 16/25 in OPFV = 10.871 mins




Execution time for iter 17/25 in OPFV = 10.794 mins




Execution time for iter 18/25 in OPFV = 10.934 mins




Execution time for iter 19/25 in OPFV = 10.983 mins




Execution time for iter 20/25 in OPFV = 10.852 mins




Execution time for iter 21/25 in OPFV = 10.788 mins




Execution time for iter 22/25 in OPFV = 10.858 mins




Execution time for iter 23/25 in OPFV = 10.755 mins




Execution time for iter 24/25 in OPFV = 10.818 mins


100%|██████████| 25/25 [4:30:12<00:00, 648.48s/it]
 40%|████      | 4/10 [18:41:48<27:10:26, 16304.42s/it]

Execution time for iter 25/25 in OPFV = 10.789 mins
OPFV (tuned) exectution time = 270.203 mins
Execution time for OPL: 270.675 mins
#################### END of OPL ####################

ROUND 4/10: test_policy_value_list_DM = {'pi_b': 0.9983321947592232, 'opfv (tuned)': 2.5496551856755727}
ROUND 4/10: test_policy_value_list_IPS = {'pi_b': 0.9119361709850553, 'opfv (tuned)': 2.188665842296368}
ROUND 4/10: test_policy_value_list_SNIPS = {'pi_b': 0.9119361709850553, 'opfv (tuned)': 2.4431528254116843}
ROUND 4/10: test_policy_value_list_SNDR = {'pi_b': 0.959690713362632, 'opfv (tuned)': 2.5112115647523034}
############################################### END of ROUND 4/10 ###############################################




############################################### START of ROUND 5/10 ###############################################

#################### START of preprocessing ####################
Maximum number of unique actions that we can use for training and test data = 2547
Execut



Execution time for iter 1/25 in OPFV = 12.973 mins




Execution time for iter 2/25 in OPFV = 13.155 mins




Execution time for iter 3/25 in OPFV = 13.138 mins




Execution time for iter 4/25 in OPFV = 13.154 mins




Execution time for iter 5/25 in OPFV = 13.136 mins




Execution time for iter 6/25 in OPFV = 13.144 mins




Execution time for iter 7/25 in OPFV = 13.115 mins




Execution time for iter 8/25 in OPFV = 13.211 mins




Execution time for iter 9/25 in OPFV = 976.494 mins




Execution time for iter 10/25 in OPFV = 13.217 mins




Execution time for iter 11/25 in OPFV = 13.113 mins




Execution time for iter 12/25 in OPFV = 13.116 mins




Execution time for iter 13/25 in OPFV = 13.142 mins




Execution time for iter 14/25 in OPFV = 13.161 mins




Execution time for iter 15/25 in OPFV = 13.117 mins




Execution time for iter 16/25 in OPFV = 13.122 mins




Execution time for iter 17/25 in OPFV = 13.151 mins




Execution time for iter 18/25 in OPFV = 13.173 mins




Execution time for iter 19/25 in OPFV = 13.156 mins




Execution time for iter 20/25 in OPFV = 13.170 mins




Execution time for iter 21/25 in OPFV = 13.138 mins




Execution time for iter 22/25 in OPFV = 13.172 mins




Execution time for iter 23/25 in OPFV = 13.227 mins




Execution time for iter 24/25 in OPFV = 13.158 mins


100%|██████████| 25/25 [21:32:01<00:00, 3100.85s/it]
 50%|█████     | 5/10 [40:18:13<53:26:12, 38474.45s/it]

Execution time for iter 25/25 in OPFV = 13.164 mins
OPFV (tuned) exectution time = 1292.021 mins
Execution time for OPL: 1292.554 mins
#################### END of OPL ####################

ROUND 5/10: test_policy_value_list_DM = {'pi_b': 0.9018935188688187, 'opfv (tuned)': 2.0129213008400217}
ROUND 5/10: test_policy_value_list_IPS = {'pi_b': 0.8596314751321196, 'opfv (tuned)': 1.954288192173232}
ROUND 5/10: test_policy_value_list_SNIPS = {'pi_b': 0.8596314751321196, 'opfv (tuned)': 2.151346264735297}
ROUND 5/10: test_policy_value_list_SNDR = {'pi_b': 0.8697799727841974, 'opfv (tuned)': 2.1870913340338194}
############################################### END of ROUND 5/10 ###############################################




############################################### START of ROUND 6/10 ###############################################

#################### START of preprocessing ####################
Maximum number of unique actions that we can use for training and test data = 2548
Exec



Execution time for iter 1/25 in OPFV = 11.667 mins




Execution time for iter 2/25 in OPFV = 11.645 mins




Execution time for iter 3/25 in OPFV = 11.695 mins




Execution time for iter 4/25 in OPFV = 11.711 mins




Execution time for iter 5/25 in OPFV = 11.727 mins




Execution time for iter 6/25 in OPFV = 11.755 mins




Execution time for iter 7/25 in OPFV = 11.738 mins




Execution time for iter 8/25 in OPFV = 11.705 mins




Execution time for iter 9/25 in OPFV = 11.723 mins




Execution time for iter 10/25 in OPFV = 11.671 mins




Execution time for iter 11/25 in OPFV = 11.685 mins




Execution time for iter 12/25 in OPFV = 11.615 mins




Execution time for iter 13/25 in OPFV = 11.678 mins




Execution time for iter 14/25 in OPFV = 11.686 mins




Execution time for iter 15/25 in OPFV = 11.694 mins




Execution time for iter 16/25 in OPFV = 11.672 mins




Execution time for iter 17/25 in OPFV = 11.664 mins




Execution time for iter 18/25 in OPFV = 11.675 mins




Execution time for iter 19/25 in OPFV = 11.687 mins




Execution time for iter 20/25 in OPFV = 11.730 mins




Execution time for iter 21/25 in OPFV = 11.705 mins




Execution time for iter 22/25 in OPFV = 11.707 mins




Execution time for iter 23/25 in OPFV = 11.731 mins




Execution time for iter 24/25 in OPFV = 11.691 mins


100%|██████████| 25/25 [4:52:21<00:00, 701.66s/it]
 60%|██████    | 6/10 [45:14:15<34:55:28, 31432.19s/it]

Execution time for iter 25/25 in OPFV = 11.702 mins
OPFV (tuned) exectution time = 292.361 mins
Execution time for OPL: 292.813 mins
#################### END of OPL ####################

ROUND 6/10: test_policy_value_list_DM = {'pi_b': 0.9120616797704034, 'opfv (tuned)': 1.334388695716544}
ROUND 6/10: test_policy_value_list_IPS = {'pi_b': 0.8735387716834242, 'opfv (tuned)': 1.2871623841844828}
ROUND 6/10: test_policy_value_list_SNIPS = {'pi_b': 0.8735387716834242, 'opfv (tuned)': 1.2350597191426598}
ROUND 6/10: test_policy_value_list_SNDR = {'pi_b': 0.8801689456358612, 'opfv (tuned)': 1.2525223334576738}
############################################### END of ROUND 6/10 ###############################################




############################################### START of ROUND 7/10 ###############################################

#################### START of preprocessing ####################
Maximum number of unique actions that we can use for training and test data = 2548
Execu



Execution time for iter 1/25 in OPFV = 9.428 mins




Execution time for iter 2/25 in OPFV = 9.458 mins




Execution time for iter 3/25 in OPFV = 9.472 mins




Execution time for iter 4/25 in OPFV = 9.499 mins




Execution time for iter 5/25 in OPFV = 9.528 mins




Execution time for iter 6/25 in OPFV = 9.481 mins




Execution time for iter 7/25 in OPFV = 9.494 mins




Execution time for iter 8/25 in OPFV = 9.503 mins




Execution time for iter 9/25 in OPFV = 9.493 mins




Execution time for iter 10/25 in OPFV = 9.513 mins




Execution time for iter 11/25 in OPFV = 9.494 mins




Execution time for iter 12/25 in OPFV = 9.533 mins




Execution time for iter 13/25 in OPFV = 9.448 mins




Execution time for iter 14/25 in OPFV = 9.483 mins




Execution time for iter 15/25 in OPFV = 9.516 mins




Execution time for iter 16/25 in OPFV = 9.478 mins




Execution time for iter 17/25 in OPFV = 9.454 mins




Execution time for iter 18/25 in OPFV = 9.486 mins




Execution time for iter 19/25 in OPFV = 9.512 mins




Execution time for iter 20/25 in OPFV = 9.475 mins




Execution time for iter 21/25 in OPFV = 9.470 mins




Execution time for iter 22/25 in OPFV = 9.508 mins




Execution time for iter 23/25 in OPFV = 9.490 mins




Execution time for iter 24/25 in OPFV = 9.471 mins


100%|██████████| 25/25 [3:57:09<00:00, 569.19s/it]
 70%|███████   | 7/10 [49:14:46<21:33:43, 25874.34s/it]

Execution time for iter 25/25 in OPFV = 9.473 mins
OPFV (tuned) exectution time = 237.162 mins
Execution time for OPL: 237.562 mins
#################### END of OPL ####################

ROUND 7/10: test_policy_value_list_DM = {'pi_b': 0.8220767625855312, 'opfv (tuned)': 1.2946919321957093}
ROUND 7/10: test_policy_value_list_IPS = {'pi_b': 0.7763966474489367, 'opfv (tuned)': 1.1537120279944075}
ROUND 7/10: test_policy_value_list_SNIPS = {'pi_b': 0.7763966474489367, 'opfv (tuned)': 1.1928035369401573}
ROUND 7/10: test_policy_value_list_SNDR = {'pi_b': 0.7858073786699357, 'opfv (tuned)': 1.1047720775881908}
############################################### END of ROUND 7/10 ###############################################




############################################### START of ROUND 8/10 ###############################################

#################### START of preprocessing ####################
Maximum number of unique actions that we can use for training and test data = 2548
Execu



Execution time for iter 1/25 in OPFV = 12.404 mins




Execution time for iter 2/25 in OPFV = 12.496 mins




Execution time for iter 3/25 in OPFV = 12.470 mins




Execution time for iter 4/25 in OPFV = 12.452 mins




Execution time for iter 5/25 in OPFV = 12.518 mins




Execution time for iter 6/25 in OPFV = 12.482 mins




Execution time for iter 7/25 in OPFV = 12.513 mins




Execution time for iter 8/25 in OPFV = 12.456 mins




Execution time for iter 9/25 in OPFV = 12.481 mins




Execution time for iter 10/25 in OPFV = 12.500 mins




Execution time for iter 11/25 in OPFV = 12.516 mins




Execution time for iter 12/25 in OPFV = 12.488 mins




Execution time for iter 13/25 in OPFV = 12.492 mins




Execution time for iter 14/25 in OPFV = 12.454 mins




Execution time for iter 15/25 in OPFV = 12.463 mins




Execution time for iter 16/25 in OPFV = 12.479 mins




Execution time for iter 17/25 in OPFV = 12.492 mins




Execution time for iter 18/25 in OPFV = 12.431 mins




Execution time for iter 19/25 in OPFV = 12.459 mins




Execution time for iter 20/25 in OPFV = 12.508 mins




Execution time for iter 21/25 in OPFV = 12.495 mins




Execution time for iter 22/25 in OPFV = 12.469 mins




Execution time for iter 23/25 in OPFV = 12.495 mins




Execution time for iter 24/25 in OPFV = 12.512 mins


100%|██████████| 25/25 [5:12:00<00:00, 748.83s/it]
 80%|████████  | 8/10 [54:30:43<13:09:04, 23672.17s/it]

Execution time for iter 25/25 in OPFV = 12.485 mins
OPFV (tuned) exectution time = 312.012 mins
Execution time for OPL: 312.519 mins
#################### END of OPL ####################

ROUND 8/10: test_policy_value_list_DM = {'pi_b': 1.0254309917429087, 'opfv (tuned)': 2.181875756158991}
ROUND 8/10: test_policy_value_list_IPS = {'pi_b': 0.9754373607348054, 'opfv (tuned)': 1.7790224195052315}
ROUND 8/10: test_policy_value_list_SNIPS = {'pi_b': 0.9754373607348054, 'opfv (tuned)': 2.644396023087988}
ROUND 8/10: test_policy_value_list_SNDR = {'pi_b': 0.9809746803590634, 'opfv (tuned)': 1.1137164140249651}
############################################### END of ROUND 8/10 ###############################################




############################################### START of ROUND 9/10 ###############################################

#################### START of preprocessing ####################
Maximum number of unique actions that we can use for training and test data = 2548
Execut



Execution time for iter 1/25 in OPFV = 9.680 mins




Execution time for iter 2/25 in OPFV = 9.727 mins




Execution time for iter 3/25 in OPFV = 9.702 mins




Execution time for iter 4/25 in OPFV = 9.674 mins




Execution time for iter 5/25 in OPFV = 9.710 mins




Execution time for iter 6/25 in OPFV = 9.729 mins




Execution time for iter 7/25 in OPFV = 9.672 mins




Execution time for iter 8/25 in OPFV = 9.751 mins




Execution time for iter 9/25 in OPFV = 9.752 mins




Execution time for iter 10/25 in OPFV = 9.731 mins




Execution time for iter 11/25 in OPFV = 9.736 mins




Execution time for iter 12/25 in OPFV = 9.732 mins




Execution time for iter 13/25 in OPFV = 9.735 mins




Execution time for iter 14/25 in OPFV = 9.752 mins




Execution time for iter 15/25 in OPFV = 9.730 mins




Execution time for iter 16/25 in OPFV = 9.692 mins




Execution time for iter 17/25 in OPFV = 9.726 mins




Execution time for iter 18/25 in OPFV = 9.787 mins




Execution time for iter 19/25 in OPFV = 9.731 mins




Execution time for iter 20/25 in OPFV = 9.732 mins




Execution time for iter 21/25 in OPFV = 9.750 mins




Execution time for iter 22/25 in OPFV = 9.666 mins




Execution time for iter 23/25 in OPFV = 9.799 mins




Execution time for iter 24/25 in OPFV = 9.740 mins


100%|██████████| 25/25 [4:03:10<00:00, 583.64s/it]
 90%|█████████ | 9/10 [58:37:22<5:48:18, 20898.28s/it] 

Execution time for iter 25/25 in OPFV = 9.746 mins
OPFV (tuned) exectution time = 243.184 mins
Execution time for OPL: 243.586 mins
#################### END of OPL ####################

ROUND 9/10: test_policy_value_list_DM = {'pi_b': 0.8625899555271078, 'opfv (tuned)': 0.720461679407531}
ROUND 9/10: test_policy_value_list_IPS = {'pi_b': 0.8155045617723397, 'opfv (tuned)': 0.6898033004689228}
ROUND 9/10: test_policy_value_list_SNIPS = {'pi_b': 0.8155045617723397, 'opfv (tuned)': 0.687117304400665}
ROUND 9/10: test_policy_value_list_SNDR = {'pi_b': 0.8275253478036803, 'opfv (tuned)': 0.6775686964101393}
############################################### END of ROUND 9/10 ###############################################




############################################### START of ROUND 10/10 ###############################################

#################### START of preprocessing ####################
Maximum number of unique actions that we can use for training and test data = 2548
Execut



Execution time for iter 1/25 in OPFV = 10.352 mins




Execution time for iter 2/25 in OPFV = 10.331 mins




Execution time for iter 3/25 in OPFV = 10.349 mins




Execution time for iter 4/25 in OPFV = 10.368 mins




Execution time for iter 5/25 in OPFV = 10.286 mins




Execution time for iter 6/25 in OPFV = 10.359 mins




Execution time for iter 7/25 in OPFV = 10.290 mins




Execution time for iter 8/25 in OPFV = 10.312 mins




Execution time for iter 9/25 in OPFV = 10.373 mins




Execution time for iter 10/25 in OPFV = 10.316 mins




Execution time for iter 11/25 in OPFV = 10.383 mins




Execution time for iter 12/25 in OPFV = 10.356 mins




Execution time for iter 13/25 in OPFV = 10.307 mins




Execution time for iter 14/25 in OPFV = 10.369 mins




Execution time for iter 15/25 in OPFV = 10.348 mins




Execution time for iter 16/25 in OPFV = 10.379 mins




Execution time for iter 17/25 in OPFV = 10.390 mins




Execution time for iter 18/25 in OPFV = 10.313 mins




Execution time for iter 19/25 in OPFV = 10.311 mins




Execution time for iter 20/25 in OPFV = 10.356 mins




Execution time for iter 21/25 in OPFV = 10.338 mins




Execution time for iter 22/25 in OPFV = 10.351 mins




Execution time for iter 23/25 in OPFV = 10.324 mins




Execution time for iter 24/25 in OPFV = 10.410 mins


100%|██████████| 25/25 [4:18:38<00:00, 620.73s/it]
100%|██████████| 10/10 [62:59:22<00:00, 22676.27s/it] 

Execution time for iter 25/25 in OPFV = 10.368 mins
OPFV (tuned) exectution time = 258.638 mins
Execution time for OPL: 259.064 mins
#################### END of OPL ####################

ROUND 10/10: test_policy_value_list_DM = {'pi_b': 0.9701250557479605, 'opfv (tuned)': 1.2141077958692346}
ROUND 10/10: test_policy_value_list_IPS = {'pi_b': 0.9255632286169481, 'opfv (tuned)': 0.9414382516529288}
ROUND 10/10: test_policy_value_list_SNIPS = {'pi_b': 0.9255632286169481, 'opfv (tuned)': 1.013646146078603}
ROUND 10/10: test_policy_value_list_SNDR = {'pi_b': 0.9357688409696717, 'opfv (tuned)': 1.1056077511614633}
############################################### END of ROUND 10/10 ###############################################



Execution time = 3779.516 mins





In [7]:
### Write the result tables to the CSV files
# result_pi_learned = pi_learned_list_all_results.copy()
result_df_DM = test_policy_value_list_DM_all_results.copy()
result_df_IPS = test_policy_value_list_IPS_all_results.copy()
result_df_SNIPS = test_policy_value_list_SNIPS_all_results.copy()
result_df_SNDR = test_policy_value_list_SNDR_all_results.copy()

result_df_DM = pd.DataFrame(result_df_DM)
result_df_IPS = pd.DataFrame(result_df_IPS)
result_df_SNIPS = pd.DataFrame(result_df_SNIPS)
result_df_SNDR = pd.DataFrame(result_df_SNDR)

result_df_DM.to_csv(df_path / "result_df_DM_opfv_tuned.csv")
result_df_IPS.to_csv(df_path / "result_df_IPS_opfv_tuned.csv")
result_df_SNIPS.to_csv(df_path / "result_df_SNIPS_opfv_tuned.csv")
result_df_SNDR.to_csv(df_path / "result_df_SNDR_opfv_tuned.csv")