In [None]:
#Dependencies

import os
from tqdm import tqdm
import pandas as pd
import numpy as np
import torch
import itertools
import matplotlib.pyplot as plt
import time
import math
import collections

import json 
import pickle

from src.environment.ml_env import OfflineEnv, OfflineFairEnv
from src.model.recommender import DRRAgent, FairRecAgent

import obp
from sklearn.linear_model import LogisticRegression
from obp.ope import (
    OffPolicyEvaluation, 
    RegressionModel,
    InverseProbabilityWeighting as IPS,
    SelfNormalizedInverseProbabilityWeighting as SNIPS,
    DirectMethod as DM,
    DoublyRobust as DR,
    DoublyRobustWithShrinkage as DRos,
)

In [None]:
dataset_path = "data/movie_lens_100k_output_path.json"
with open(dataset_path) as json_file:
    _dataset_path = json.load(json_file)


dataset = {}
with open(_dataset_path["eval_users_dict"], "rb") as pkl_file:
    dataset["eval_users_dict"] = pickle.load(pkl_file)

with open(_dataset_path["eval_users_dict_positive_items"], "rb") as pkl_file:
    dataset["eval_users_dict_positive_items"] = pickle.load(pkl_file)

with open(_dataset_path["eval_users_history_lens"], "rb") as pkl_file:
    dataset["eval_users_history_lens"] = pickle.load(pkl_file)

with open(_dataset_path["users_history_lens"], "rb") as pkl_file:
    dataset["users_history_lens"] = pickle.load(pkl_file)

with open(_dataset_path["movies_groups"], "rb") as pkl_file:
    dataset["movies_groups"] = pickle.load(pkl_file)

In [None]:
ENV = dict(drr=OfflineEnv, fairrec=OfflineFairEnv)
AGENT = dict(drr=DRRAgent, fairrec=FairRecAgent)

In [None]:
train_ids = [
    "movie_lens_100k_2021-10-26_11-45-48",
    "movie_lens_100k_2021-10-24_01-42-57", # long training
    "movie_lens_100k_fair_2021-10-26_11-45-51",
    "movie_lens_100k_fair_2021-10-24_01-41-02" # long training
]

In [None]:
algorithm = "fairrec"
train_version = "movie_lens_100k_fair"
train_id = train_ids[2]
output_path = "model/{}/{}".format(train_version, train_id)

users_num = 943
items_num = 1682

state_size = 5
srm_size = 2

embedding_dim = 50
actor_hidden_dim = 512
actor_learning_rate = 0.0001
critic_hidden_dim = 512
critic_learning_rate = 0.001
discount_factor = 0.9
tau = 0.01
replay_memory_size = 1000000
batch_size = 64
emb_model = "user_movie"
embedding_network_weights = "model/pmf/emb_50_ratio_0.800000_bs_1000_e_258_wd_0.100000_lr_0.000100_trained_pmf.pt"
n_groups = 10
fairness_constraints = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]


top_k = [5, 10]
done_count = 10

In [None]:
actor_checkpoint = sorted(
    [
        int((f.split("_")[1]).split(".")[0])
        for f in os.listdir(output_path)
        if f.startswith("actor_")
    ]
)[-1]
critic_checkpoint = sorted(
    [
        int((f.split("_")[1]).split(".")[0])
        for f in os.listdir(output_path)
        if f.startswith("critic_")
    ]
)[-1]

print(actor_checkpoint, critic_checkpoint)

In [None]:
env = ENV[algorithm](
    users_dict=dataset["eval_users_dict"],
    users_history_lens=dataset["eval_users_history_lens"],
    n_groups=n_groups,
    movies_groups=dataset["movies_groups"],
    state_size=state_size,
    done_count=done_count,
    fairness_constraints=fairness_constraints,
)
available_users = env.available_users

recommender = AGENT[algorithm](
    env=env,
    users_num=users_num,
    items_num=items_num,
    genres_num=0,
    movies_genres_id={}, 
    srm_size=srm_size,
    state_size=state_size,
    train_version=train_version,
    is_test=True,
    embedding_dim=embedding_dim,
    actor_hidden_dim=actor_hidden_dim,
    actor_learning_rate=actor_learning_rate,
    critic_hidden_dim=critic_hidden_dim,
    critic_learning_rate=critic_learning_rate,
    discount_factor=discount_factor,
    tau=tau,
    replay_memory_size=replay_memory_size,
    batch_size=batch_size,
    model_path=output_path,
    emb_model=emb_model,
    embedding_network_weights_path=embedding_network_weights,
    n_groups=n_groups,
    fairness_constraints=fairness_constraints,
)

recommender.load_model(
    os.path.join(output_path, "actor_{}.h5".format(actor_checkpoint)),
    os.path.join(
        output_path, "critic_{}.h5".format(critic_checkpoint)
    ),
)
for user_id in tqdm(available_users):
    eval_env = ENV[algorithm](
        users_dict=dataset["eval_users_dict"],
        users_history_lens=dataset["eval_users_history_lens"],
        n_groups=n_groups,
        movies_groups=dataset["movies_groups"],
        state_size=state_size,
        done_count=done_count,
        fairness_constraints=fairness_constraints,
        fix_user_id=user_id
    )

    recommender.env = eval_env
    available_items = set(eval_env.user_items.keys())

    precision, ndcg, propfair = recommender.evaluate(
        eval_env, top_k=K, available_items=available_items
    )

    del eval_env

In [None]:
## OPE using validation data
regression_model = RegressionModel(
    n_actions=dataset.n_actions, 
    base_model=LogisticRegression(C=100, max_iter=10000, random_state=12345),
)
estimated_rewards = regression_model.fit_predict(
    context=validation_bandit_data["context"], # context; x
    action=validation_bandit_data["action"], # action; a
    reward=validation_bandit_data["reward"], # reward; r
    n_folds=2, # 2-fold cross fitting
    random_state=12345,
)
ope = OffPolicyEvaluation(
    bandit_feedback=validation_bandit_data,
    ope_estimators=[
        IPS(estimator_name="IPS"), 
        DM(estimator_name="DM"), 
        IPS(lambda_=5, estimator_name="CIPS"), 
        SNIPS(estimator_name="SNIPS"),
        DR(estimator_name="DR"), 
        DRos(lambda_=500, estimator_name="DRos"), 
    ]
)

estimated_policy_value = ope.estimate_policy_values(
    action_dist=eg_action_dist,
    estimated_rewards_by_reg_model=estimated_rewards, 
)
estimated_policy_value