# Online Evaluation

We pretrain a PMF model as the environment simulator, i.e., to predict an item's feedback that the user never rates before. The online evaluation procedure follows the Training Algorithm, i.e., the parameters continuously update during the online evaluation stage. Its major difference is that the feedback of a recommended item is observed by the environment simulator. 

In [1]:
import sys 
sys.path.append('..')

#Dependencies
import os
import json 
import yaml
import pickle
from random import sample
from tqdm import tqdm

import pandas as pd

from src.environment import OfflineEnv, OfflineFairEnv
from src.model.recommender import DRRAgent, FairRecAgent

from src.recsys_fair_metrics.recsys_fair import RecsysFair

from IPython.display import clear_output
import recmetrics as rm


ENV = dict(drr=OfflineEnv, fairrec=OfflineFairEnv)
AGENT = dict(drr=DRRAgent, fairrec=FairRecAgent)

import torch
torch.cuda.empty_cache()

In [2]:
dataset_name = "yelp_pd"
dataset_path = "../data/{}_output_path.json".format(dataset_name)
with open(dataset_path) as json_file:
    _dataset_path = json.load(json_file)


dataset = {}
with open(os.path.join("..", _dataset_path["eval_users_dict"]), "rb") as pkl_file:
    dataset["eval_users_dict"] = pickle.load(pkl_file)

with open(os.path.join("..", _dataset_path["eval_users_history_lens"]), "rb") as pkl_file:
    dataset["eval_users_history_lens"] = pickle.load(pkl_file)

with open(os.path.join("..", _dataset_path["users_history_lens"]), "rb") as pkl_file:
    dataset["users_history_lens"] = pickle.load(pkl_file)

with open(os.path.join("..", _dataset_path["item_groups"]), "rb") as pkl_file:
    dataset["item_groups"] = pickle.load(pkl_file)

dataset["items_df"] = pd.read_csv(os.path.join("..", _dataset_path["items_df"]))
dataset["items_metadata"] = pd.read_csv(os.path.join("..", _dataset_path["items_metadata"]))
item_groups_df = pd.DataFrame(dataset["item_groups"].items(), columns=["item_id", "group"])
catalog = item_groups_df.item_id.unique().tolist()

# Actor-Critic Models

In [4]:
drr_train_ids = {
    0: "movie_lens_1m_2022-04-06_11-29-07", # drr paper
    1: "movie_lens_1m_2022-04-05_16-33-30", # drr ceia

    2: "movie_lens_100k_2022-04-07_15-40-33", # drr paper
    3: "movie_lens_100k_2022-04-07_15-39-45", # drr ceia
    4: "movie_lens_100k_2022-02-01_15-35-49", # drr ceia

    5: "movie_lens_100k_2022-07-06_11-14-27",
    6: "movie_lens_100k_2022-07-06_12-19-24",

    7: "yelp_pd_2022-07-06_18-02-45",
    8: "yelp_pd_2022-07-06_18-03-18",
    9: "yelp_pd_2022-07-06_22-30-51"
    
}

fairrec_train_ids = {   
    # Paper
    "p-0": "movie_lens_1m_fair_2022-04-05_21-33-12",
    "p-1": "movie_lens_100k_fair_2022-04-07_22-03-32",
    "p-2": "movie_lens_100k_fair_2022-04-18_16-24-27",
    "p-3": "movie_lens_100k_fair_2022-04-20_18-52-34",
    "p-4": "movie_lens_100k_fair_2022-02-01_18-49-14", 

    "p-5": "yelp_ca_fair_2022-06-16_12-33-16",
    "p-6": "yelp_tn_fair_2022-06-16_15-27-06",
    "p-7": "yelp_fl_fair_2022-06-16_16-37-20",
    "p-8": "yelp_pa_fair_2022-06-16_17-42-17",
    "p-9": "yelp_pd_fair_2022-06-23_18-10-53",

    # Adaptative
    "a-0": "movie_lens_100k_fair_2022-02-03_14-10-48",
    "a-1": "movie_lens_100k_fair_2022-02-04_09-59-46",

    "a-2": "movie_lens_1m_fair_2022-04-06_09-10-12",
    "a-3": "movie_lens_100k_fair_2022-04-07_16-52-53",
    "a-4": "movie_lens_100k_fair_2022-04-14_09-55-40", # 0.6
    "a-5": "movie_lens_100k_fair_2022-04-14_15-13-32", # 0.55
    "a-6": "movie_lens_100k_fair_2022-04-18_11-03-25", # genre 0.55
    "a-7": "movie_lens_100k_fair_2022-04-18_11-06-22", # title 0.55
    "a-8": "movie_lens_100k_fair_2022-04-19_09-14-01", # genre 0.5
    "a-9": "movie_lens_100k_fair_2022-04-19_09-24-16", # title 0.76
    "a-10": "movie_lens_100k_fair_2022-04-20_18-51-43", # genre 0.58

    "a-11": "movie_lens_100k_fair_2022-04-25_22-49-01", # genre + title v3 0.5
    "a-12": "movie_lens_100k_fair_2022-04-25_22-53-17", # title v3 0.5
    "a-13": "movie_lens_100k_fair_2022-04-26_07-39-21", # genre 0.5

    "a-14": "movie_lens_100k_fair_2022-04-26_12-27-11", # title v2 0.5
    "a-15": "movie_lens_100k_fair_2022-04-26_12-28-13", # genre + title v2 0.5"

    "a-16": "movie_lens_100k_fair_2022-04-29_12-36-02", # genre + title v2 0.5
    "a-17": "movie_lens_100k_fair_2022-05-05_11-28-10", # genre + title v2 0.51
    "a-18": "movie_lens_100k_fair_2022-05-05_14-54-51", # genre + title v2 0.52
    "a-19": "movie_lens_100k_fair_2022-05-05_18-11-37", # genre + title v2 0.53
    "a-20": "movie_lens_100k_fair_2022-05-05_21-23-33", # genre + title v2 0.54
    "a-21": "movie_lens_100k_fair_2022-05-06_00-32-23", # genre + title v2 0.55
    "a-22": "movie_lens_100k_fair_2022-05-05_11-28-46", # genre + title v2 0.56
    "a-23": "movie_lens_100k_fair_2022-05-05_14-51-34", # genre + title v2 0.57
    "a-24": "movie_lens_100k_fair_2022-05-05_18-06-41", # genre + title v2 0.58
    "a-25": "movie_lens_100k_fair_2022-05-05_21-21-03", # genre + title v2 0.59
    "a-26": "movie_lens_100k_fair_2022-05-06_00-29-50", # genre + title v2 0.6
    "a-27": "movie_lens_100k_fair_2022-05-06_03-47-52", # genre + title v2 0.61
    "a-28": "movie_lens_100k_fair_2022-05-06_06-54-07", # genre + title v2 0.62
    "a-29": "movie_lens_100k_fair_2022-05-06_03-47-59", # genre + title v2 0.63 
    "a-30": "movie_lens_100k_fair_2022-05-06_06-54-00", # genre + title v2 0.64
    "a-31": "movie_lens_100k_fair_2022-05-06_10-09-00", # genre + title v2 0.65
    "a-32": "movie_lens_100k_fair_2022-05-06_10-12-07", # genre + title v2 0.66
   
    # Combining
    "c-0": "movie_lens_1m_fair_2022-04-05_23-46-04",
    "c-1": "movie_lens_100k_fair_2022-04-07_17-56-50",
    "c-2": "movie_lens_100k_fair_2022-04-24_17-38-08", # genre
    "c-3": "movie_lens_100k_fair_2022-04-24_17-38-39", # title v1
    "c-4": "movie_lens_100k_fair_2022-04-24_21-58-41", # title v2
    "c-5": "movie_lens_100k_fair_2022-04-24_22-01-39", # genre + title v2 

    "c-6": "movie_lens_100k_fair_2022-04-25_19-31-54", # title v3
    "c-7": "movie_lens_100k_fair_2022-04-25_19-32-40", # genre + title v3
    "c-8": "movie_lens_100k_fair_2022-04-26_17-45-20", # item genre bert
    "c-9": "movie_lens_100k_fair_2022-04-26_17-48-20", # item genre date
   
}

idx = 7

algorithm = "drr"
train_version = dataset_name if algorithm == "drr" else "{}_fair".format(dataset_name)
train_id = drr_train_ids[idx] if algorithm == "drr" else fairrec_train_ids[idx]
output_path = "../model/{}/{}".format(train_version, train_id)

path = os.path.abspath(
    os.path.join(output_path, "{}.yaml".format(train_version))
)
with open(path) as f:
    config = yaml.load(f, Loader=yaml.FullLoader)


no_cuda = False
top_k = [3, 5, 10, 15]

In [5]:
_bert_emb = [
    "item_title_emb",
    "item_title_genre_emb",
    "item_genre_emb_bert",
    "item_genre_date_emb",
]
bert = None
if config["model_train"]["user_intent"] in _bert_emb:
    from sentence_transformers import SentenceTransformer

    bert = SentenceTransformer("all-MiniLM-L6-v2")

In [7]:
_precision = []
_propfair = []
_ufg = []
_recommended_item = []
_random_recommended_item = []
_sum_reward = []
for k in top_k:
    sum_precision = 0
    sum_propfair = 0
    sum_reward = 0

    recommended_item = []
    random_recommended_item = []

    env = ENV[algorithm](
        users_dict=dataset["eval_users_dict"],
        users_history_lens=dataset["eval_users_history_lens"],
        n_groups=config["model_train"]["n_groups"],
        item_groups=dataset["item_groups"],
        state_size=config["model_train"]["state_size"],
        done_count=k,
        fairness_constraints=config["model_train"]["fairness_constraints"],
        reward_threshold=config["model_train"]["reward_threshold"],
        reward_version=config["model_train"]["reward_version"],
        use_only_reward_model=True,
        items_metadata=dataset["items_metadata"],
        items_df=dataset["items_df"],
        user_intent_threshold=config["model_train"]["user_intent_threshold"],
        user_intent=config["model_train"]["user_intent"],
    )
    env.bert = bert
    available_users = env.available_users

    recommender = AGENT[algorithm](
        env=env,
        is_test=True,
        train_version="{}_{}".format(train_version, config["model_train"]["reward_version"]),
        model_path=output_path,
        users_num=config["model_train"]["users_num"],
        items_num=config["model_train"]["items_num"],
        embedding_dim=config["model_train"]["embedding_dim"],
        srm_size=config["model_train"]["srm_size"],
        state_size=config["model_train"]["state_size"],
        actor_hidden_dim=config["model_train"]["actor_hidden_dim"],
        actor_learning_rate=config["model_train"]["actor_learning_rate"],
        critic_hidden_dim=config["model_train"]["critic_hidden_dim"],
        critic_learning_rate=config["model_train"]["critic_learning_rate"],
        discount_factor=config["model_train"]["discount_factor"],
        tau=config["model_train"]["tau"],
        learning_starts=1, # config["model_train"]["learning_starts"],
        replay_memory_size=config["model_train"]["replay_memory_size"],
        batch_size=1, # config["model_train"]["batch_size"],
        embedding_network_weights_path="../{}".format(config["model_train"]["embedding_network_weights"]),
        n_groups=config["model_train"]["n_groups"],
        fairness_constraints=config["model_train"]["fairness_constraints"],
        use_reward_model=config["model_train"]["use_reward_model"],
    )

    for user_id in tqdm(available_users):

        eval_env = ENV[algorithm](
            users_dict=dataset["eval_users_dict"],
            users_history_lens=dataset["eval_users_history_lens"],
            n_groups=config["model_train"]["n_groups"],
            item_groups=dataset["item_groups"],
            state_size=config["model_train"]["state_size"],
            done_count=k, 
            fairness_constraints=config["model_train"]["fairness_constraints"],
            reward_threshold=config["model_train"]["reward_threshold"],
            reward_version=config["model_train"]["reward_version"],
            fix_user_id=user_id,
            reward_model=recommender.reward_model,
            device=recommender.device,
            use_only_reward_model=True,
            items_metadata=dataset["items_metadata"],
            items_df=dataset["items_df"],
            user_intent_threshold=config["model_train"]["user_intent_threshold"],
            user_intent=config["model_train"]["user_intent"],
        )
        eval_env.bert = bert

        # recommender.buffer = pickle.load(open(os.path.join(output_path, "buffer.pkl"), "rb"))

        precision, ndcg, propfair, reward, list_recommended_item, _, _ = recommender.online_evaluate(
            top_k=False, load_model=True, env = eval_env
        )

        recommended_item.append(list_recommended_item)
        random_recommended_item.append({user_id: sample(catalog, k)})

        sum_precision += precision
        sum_propfair += propfair
        sum_reward += reward

        del eval_env


    _precision.append(sum_precision / len(dataset["eval_users_dict"]))
    _propfair.append(sum_propfair / len(dataset["eval_users_dict"]))
    _ufg.append((sum_propfair / len(dataset["eval_users_dict"]))
        / (1 - (sum_precision / len(dataset["eval_users_dict"]))))
    _recommended_item.append(recommended_item)
    _random_recommended_item.append(random_recommended_item)
    _sum_reward.append(sum_reward / len(dataset["eval_users_dict"]))


clear_output(wait=True)
torch.cuda.empty_cache()

----- Reward Model:  True


100%|██████████| 2181/2181 [02:52<00:00, 12.61it/s]


----- Reward Model:  True


100%|██████████| 2181/2181 [03:50<00:00,  9.45it/s]


----- Reward Model:  True


100%|██████████| 2181/2181 [06:49<00:00,  5.32it/s]


----- Reward Model:  True


100%|██████████| 2181/2181 [09:50<00:00,  3.70it/s]


# RecMetrics

In [8]:
feature_df = pd.DataFrame(item_groups_df[["item_id"]].apply(lambda x: recommender.get_items_emb(x).cpu().numpy().tolist())["item_id"].tolist())

metrics = {}
for k in range(len(top_k)):
    recs = pd.DataFrame([i.values() for i in _recommended_item[k]], columns=["sorted_actions"]).sorted_actions.values.tolist()

    metrics[top_k[k]] = {
        "precision": round(_precision[k] * 100, 4),
        "propfair": round(_propfair[k] * 100, 4),
        "ufg": round(_ufg[k], 4),
        "coverage": round(rm.prediction_coverage(recs, catalog), 4),
        "personalization": round(rm.personalization(recs) * 100, 4),
        "intra_list_similarity": round(rm.intra_list_similarity(recs, feature_df), 4),
    }

# with open(os.path.join(output_path, "metrics.json"), "w") as f:
#     json.dump(metrics, f)

In [9]:
metrics

{3: {'precision': 8.7505,
  'propfair': 7.6221,
  'ufg': 0.0835,
  'coverage': 0.2,
  'personalization': 4.958,
  'intra_list_similarity': 0.5264},
 5: {'precision': 8.7713,
  'propfair': 7.7919,
  'ufg': 0.0854,
  'coverage': 0.27,
  'personalization': 4.9913,
  'intra_list_similarity': 0.5595},
 10: {'precision': 8.7871,
  'propfair': 8.003,
  'ufg': 0.0877,
  'coverage': 0.44,
  'personalization': 3.7095,
  'intra_list_similarity': 0.6127},
 15: {'precision': 8.774,
  'propfair': 7.937,
  'ufg': 0.087,
  'coverage': 0.65,
  'personalization': 2.9269,
  'intra_list_similarity': 0.6349}}

# Exposure

In [None]:
for k in top_k:
    _df = pd.DataFrame([i.values() for i in recommended_item], columns=["sorted_actions"])
    _df["user_id"] = [list(i.keys())[0] for i in recommended_item]
    _item_metadata = pd.DataFrame(dataset["item_groups"].items(), columns=["item_id", "group"])

    user_column = "user_id"
    item_column = "item_id"
    reclist_column = "sorted_actions"

    recsys_fair = RecsysFair(
        df = _df, 
        supp_metadata = _item_metadata,
        user_column = user_column, 
        item_column = item_column, 
        reclist_column = reclist_column, 
    )

    fair_column = "group"
    ex = recsys_fair.exposure(fair_column, k)

    fig = ex.show(kind='per_group_norm', column=fair_column)
    fig.write_image(os.path.join(output_path, "exposure_per_group_k{}_v2.png".format(k)))

    fig = ex.show(kind='per_rank_pos', column=fair_column)
    fig.write_image(os.path.join(output_path, "exposure_per_rank_k{}_v2.png".format(k)))