# Online Evaluation

We pretrain a PMF model as the environment simulator, i.e., to predict an item's feedback that the user never rates before. The online evaluation procedure follows the Training Algorithm, i.e., the parameters continuously update during the online evaluation stage. Its major difference is that the feedback of a recommended item is observed by the environment simulator. 

In [None]:
import sys 
sys.path.append('..')

#Dependencies
import os
import json 
import yaml
import pickle
from random import sample
from tqdm import tqdm

import pandas as pd
import numpy as np

from src.environment import OfflineEnv, OfflineFairEnv
from src.model.recommender import DRRAgent, FairRecAgent

from src.recsys_fair_metrics.recsys_fair import RecsysFair

from IPython.display import clear_output
import plotly.offline as py
import plotly.graph_objs as go
import matplotlib.pyplot as plt
import recmetrics as rm

py.init_notebook_mode(connected=True)


ENV = dict(drr=OfflineEnv, fairrec=OfflineFairEnv)
AGENT = dict(drr=DRRAgent, fairrec=FairRecAgent)

In [None]:
dataset_name = "movie_lens_1m"
dataset_path = "../data/{}_output_path.json".format(dataset_name)
with open(dataset_path) as json_file:
    _dataset_path = json.load(json_file)


dataset = {}
with open(os.path.join("..", _dataset_path["eval_users_dict"]), "rb") as pkl_file:
    dataset["eval_users_dict"] = pickle.load(pkl_file)

with open(os.path.join("..", _dataset_path["eval_users_history_lens"]), "rb") as pkl_file:
    dataset["eval_users_history_lens"] = pickle.load(pkl_file)

with open(os.path.join("..", _dataset_path["users_history_lens"]), "rb") as pkl_file:
    dataset["users_history_lens"] = pickle.load(pkl_file)

with open(os.path.join("..", _dataset_path["item_groups"]), "rb") as pkl_file:
    dataset["item_groups"] = pickle.load(pkl_file)

item_groups_df = pd.DataFrame(dataset["item_groups"].items(), columns=["item_id", "group"])
catalog = item_groups_df.item_id.unique().tolist()

# Actor-Critic Models

In [None]:
drr_train_ids = {
    0: "movie_lens_1m_2022-04-05_14-47-30", # ou noise sem reset
    1: "movie_lens_1m_2022-04-05_16-33-30", # com reset
    2: "movie_lens_1m_2022-04-06_11-29-07", # drr paper
}

fairrec_train_ids = {   
    # Paper
    0: "movie_lens_1m_fair_2022-04-05_14-51-24",
    1: "movie_lens_1m_fair_2022-04-05_21-33-12",

    # Adaptative
    2: "movie_lens_1m_fair_2022-04-06_09-10-12",
   
    # Combining
    3: "movie_lens_1m_fair_2022-04-05_23-46-04",
   
}

idx = 2


algorithm = "drr"
train_version = dataset_name if algorithm == "drr" else "{}_fair".format(dataset_name)
train_id = drr_train_ids[idx] if algorithm == "drr" else fairrec_train_ids[idx]
output_path = "../model/{}/{}".format(train_version, train_id)

path = os.path.abspath(
    os.path.join(output_path, "{}.yaml".format(train_version))
)
with open(path) as f:
    config = yaml.load(f, Loader=yaml.FullLoader)


no_cuda = False
top_k = [3, 5, 10, 15]

In [None]:
_precision = []
_propfair = []
_ufg = []
_recommended_item = []
_random_recommended_item = []
for k in top_k:
    sum_precision = 0
    sum_propfair = 0
    sum_reward = 0

    recommended_item = []
    random_recommended_item = []

    env = ENV[algorithm](
        users_dict=dataset["eval_users_dict"],
        users_history_lens=dataset["eval_users_history_lens"],
        n_groups=config["model_train"]["n_groups"],
        item_groups=dataset["item_groups"],
        state_size=config["model_train"]["state_size"],
        done_count=k,# config["model_train"]["done_count"],
        fairness_constraints=config["model_train"]["fairness_constraints"],
        reward_threshold=config["model_train"]["reward_threshold"],
        reward_version=config["model_train"]["reward_version"],
        use_only_reward_model=True,
    )
    available_users = env.available_users

    recommender = AGENT[algorithm](
        env=env,
        is_test=True,
        train_version="{}_{}".format(train_version, config["model_train"]["reward_version"]),
        model_path=output_path,
        users_num=config["model_train"]["users_num"],
        items_num=config["model_train"]["items_num"],
        embedding_dim=config["model_train"]["embedding_dim"],
        srm_size=config["model_train"]["srm_size"],
        state_size=config["model_train"]["state_size"],
        actor_hidden_dim=config["model_train"]["actor_hidden_dim"],
        actor_learning_rate=config["model_train"]["actor_learning_rate"],
        critic_hidden_dim=config["model_train"]["critic_hidden_dim"],
        critic_learning_rate=config["model_train"]["critic_learning_rate"],
        discount_factor=config["model_train"]["discount_factor"],
        tau=config["model_train"]["tau"],
        learning_starts=len(available_users) + 1,# config["model_train"]["learning_starts"],
        replay_memory_size=config["model_train"]["replay_memory_size"],
        batch_size=config["model_train"]["batch_size"],
        embedding_network_weights_path="../{}".format(config["model_train"]["embedding_network_weights"]),
        n_groups=config["model_train"]["n_groups"],
        fairness_constraints=config["model_train"]["fairness_constraints"],
        use_reward_model=config["model_train"]["use_reward_model"],
    )

    for user_id in tqdm(available_users):

        eval_env = ENV[algorithm](
            users_dict=dataset["eval_users_dict"],
            users_history_lens=dataset["eval_users_history_lens"],
            n_groups=config["model_train"]["n_groups"],
            item_groups=dataset["item_groups"],
            state_size=config["model_train"]["state_size"],
            done_count=k, #config["model_train"]["done_count"],
            fairness_constraints=config["model_train"]["fairness_constraints"],
            reward_threshold=config["model_train"]["reward_threshold"],
            reward_version=config["model_train"]["reward_version"],
            fix_user_id=user_id,
            reward_model=recommender.reward_model,
            device=recommender.device,
            use_only_reward_model=True,
        )

        # recommender.env = eval_env

        # recommender.buffer = pickle.load(open(os.path.join(output_path, "buffer.pkl"), "rb"))

        precision, ndcg, propfair, reward, list_recommended_item, _, _ = recommender.online_evaluate(
            top_k=k, load_model=True, env = eval_env
        )

        recommended_item.append(list_recommended_item)
        random_recommended_item.append({user_id: sample(catalog, k)})

        sum_precision += precision
        sum_propfair += propfair
        sum_reward += reward

        del eval_env


    _precision.append(sum_precision / len(dataset["eval_users_dict"]))
    _propfair.append(sum_propfair / len(dataset["eval_users_dict"]))
    _ufg.append((sum_propfair / len(dataset["eval_users_dict"]))
        / (1 - (sum_precision / len(dataset["eval_users_dict"]))))
    _recommended_item.append(recommended_item)
    _random_recommended_item.append(random_recommended_item)


clear_output(wait=True)

# RecMetrics

In [None]:
feature_df = pd.DataFrame(item_groups_df[["item_id"]].apply(lambda x: recommender.get_items_emb(x).cpu().numpy().tolist())["item_id"].tolist())

metrics = {}
for k in range(len(top_k)):
    recs = pd.DataFrame([i.values() for i in _recommended_item[k]], columns=["sorted_actions"]).sorted_actions.values.tolist()
    #random_recs = pd.DataFrame([i.values() for i in random_recommended_item], columns=["sorted_actions"]).sorted_actions.values.tolist()

    metrics[top_k[k]] = {
        "precision": round(_precision[k] * 100, 4),
        "propfair": round(_propfair[k] * 100, 4),
        "ufg": round(_ufg[k], 4),
        "coverage": round(rm.prediction_coverage(recs, catalog) * 100, 4),
        "personalization": round(rm.personalization(recs) * 100, 4),
        "intra_list_similarity": round(rm.intra_list_similarity(recs, feature_df), 4),
    }

with open(os.path.join(output_path, "metrics.json"), "w") as f:
    json.dump(metrics, f)

# Exposure

In [None]:
for k in top_k:
    _df = pd.DataFrame([i.values() for i in recommended_item], columns=["sorted_actions"])
    _df["user_id"] = [list(i.keys())[0] for i in recommended_item]
    _item_metadata = pd.DataFrame(dataset["item_groups"].items(), columns=["item_id", "group"])

    user_column = "user_id"
    item_column = "item_id"
    reclist_column = "sorted_actions"

    recsys_fair = RecsysFair(
        df = _df, 
        supp_metadata = _item_metadata,
        user_column = user_column, 
        item_column = item_column, 
        reclist_column = reclist_column, 
    )

    fair_column = "group"
    ex = recsys_fair.exposure(fair_column, k)

    fig = ex.show(kind='per_group_norm', column=fair_column)
    fig.write_image(os.path.join(output_path, "exposure_per_group_k{}.png".format(k)))

    fig = ex.show(kind='per_rank_pos', column=fair_column)
    fig.write_image(os.path.join(output_path, "exposure_per_rank_k{}.png".format(k)))