# Offline Evaluation

For a given session $S_j$, the recommender only recommends the items that appear in this session, denoted as $I(S_j)$, rather than the ones in the whole item space.  The offline evaluation procedure can be treated as a **rerank procedure** of the candidate set by iteratively selecting an item w.r.t. the action generated by the Actor Network in DRR framework. Moreover, the model parameters are not updated in the offline evaluation.

In [2]:
import sys 
sys.path.append('..')

#Dependencies
import os
import json 
import pickle
from tqdm import tqdm

import pandas as pd

import torch
import numpy as np

from src.environment.ml_env import OfflineEnv, OfflineFairEnv
from src.model.recommender import DRRAgent, FairRecAgent
from src.model.pmf import PMF

import obp
from obp.policy.policy_type import PolicyType
from src.model.bandit import EpsilonGreedy, LinUCB, WFairLinUCB


ENV = dict(drr=OfflineEnv, fairrec=OfflineFairEnv)
AGENT = dict(drr=DRRAgent, fairrec=FairRecAgent)

In [4]:
dataset_path = "../data/movie_lens_100k_output_path.json"
with open(dataset_path) as json_file:
    _dataset_path = json.load(json_file)


dataset = {}
with open(os.path.join("..", _dataset_path["eval_users_dict"]), "rb") as pkl_file:
    dataset["eval_users_dict"] = pickle.load(pkl_file)

with open(os.path.join("..", _dataset_path["eval_users_history_lens"]), "rb") as pkl_file:
    dataset["eval_users_history_lens"] = pickle.load(pkl_file)

with open(os.path.join("..", _dataset_path["users_history_lens"]), "rb") as pkl_file:
    dataset["users_history_lens"] = pickle.load(pkl_file)

with open(os.path.join("..", _dataset_path["movies_groups"]), "rb") as pkl_file:
    dataset["movies_groups"] = pickle.load(pkl_file)

In [None]:
from sklearn.preprocessing import OneHotEncoder
enc = OneHotEncoder(handle_unknown='ignore')

def age_group_bukets(age):
    if age < 20:
        return 0
    elif age < 30:
        return 1
    elif age < 40:
        return 2
    elif age < 50:
        return 3
    elif age < 60:
        return 4
    else:
        return 5


user_df = pd.read_csv("../data/ml-100k/users.csv").drop(columns=["zip_code"])
gender = pd.get_dummies(user_df.gender)
occupation = pd.get_dummies(user_df.occupation)
user_df["age"] = user_df["age"].apply(lambda x: age_group_bukets(x))
age = pd.get_dummies(user_df.age)
user_df = pd.concat([user_df, gender, occupation, age], axis=1)
user_df = user_df.drop(columns=["user_id", "age", "gender", "occupation"])
user_df

# Actor-Critic Models

In [None]:
drr_train_ids = []
fairrec_train_ids = []

idx = 0

algorithm = "drr"
train_version = "movie_lens_100k" if algorithm == "drr" else "movie_lens_100k_fair"
train_id = drr_train_ids[idx] if algorithm == "drr" else fairrec_train_ids[idx]

output_path = "../model/{}/{}".format(train_version, train_id)

config = {
    "users_num": 943,
    "items_num": 1682,
    "state_size": 5,
    "srm_size": 3 if algorithm == "drr" else 2,
    "embedding_dim": 50,
    "actor_hidden_dim": 512,
    "actor_learning_rate": 0.0001,
    "critic_hidden_dim": 512,
    "critic_learning_rate": 0.001,
    "discount_factor": 0.9,
    "tau": 0.01,
    "learning_starts": 1000,
    "replay_memory_size": 1000000,
    "batch_size": 64,
    "emb_model": "user_movie",
    "embedding_network_weights_path": "../model/pmf/emb_50_ratio_0.800000_bs_1000_e_258_wd_0.100000_lr_0.000100_trained_pmf.pt",
    "n_groups": 10,
    "fairness_constraints": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
}


top_k = [5, 10]
done_count = 10

In [None]:
actor_checkpoint = sorted(
    [
        int((f.split("_")[1]).split(".")[0])
        for f in os.listdir(output_path)
        if f.startswith("actor_")
    ]
)[-1]
critic_checkpoint = sorted(
    [
        int((f.split("_")[1]).split(".")[0])
        for f in os.listdir(output_path)
        if f.startswith("critic_")
    ]
)[-1]
srm_checkpoint = sorted(
    [
        int((f.split("_")[1]).split(".")[0])
        for f in os.listdir(output_path)
        if f.startswith("srm_")
    ]
)[-1]

print(actor_checkpoint, critic_checkpoint, srm_checkpoint)

In [None]:
for K in top_k:

    _precision = []
    _ndcg = []
    _reward_eps = []
    _reward_step = []
    for i in range(10):

        sum_precision = 0
        sum_ndcg = 0
        sum_reward_eps = 0
        sum_reward_step = 0

        env = ENV[algorithm](
            users_dict=dataset["eval_users_dict"],
            users_history_lens=dataset["eval_users_history_lens"],
            n_groups=config["n_groups"],
            movies_groups=dataset["movies_groups"],
            state_size=config["state_size"],
            done_count=done_count,
            fairness_constraints=config["fairness_constraints"],
        )
        available_users = env.available_users

        recommender = AGENT[algorithm](
            env=env,
            train_version=train_version,
            is_test=True,
            model_path=output_path,
            **config
        )

        recommender.load_model(
            os.path.join(output_path, "actor_{}.h5".format(actor_checkpoint)),
            os.path.join(
                output_path, "critic_{}.h5".format(actor_checkpoint)
            ),
            os.path.join(
                output_path, "srm_{}.h5".format(actor_checkpoint)
            ),
        )

        for user_id in tqdm(available_users):
            eval_env = ENV[algorithm](
                users_dict=dataset["eval_users_dict"],
                users_history_lens=dataset["eval_users_history_lens"],
                n_groups=config["n_groups"],
                movies_groups=dataset["movies_groups"],
                state_size=config["state_size"],
                done_count=done_count,
                fairness_constraints=config["fairness_constraints"],
                fix_user_id=user_id
            )

            available_items = set(eval_env.user_items.keys())

            precision, ndcg, reward_eps, reward_step = recommender.offline_evaluate(
                eval_env, top_k=K, available_items=available_items
            )

            sum_precision += precision
            sum_ndcg += ndcg
            sum_reward_eps += reward_eps
            sum_reward_step += reward_step

            del eval_env

        _precision.append(sum_precision / len(dataset["eval_users_dict"]))
        _ndcg.append(sum_ndcg / len(dataset["eval_users_dict"]))
        _reward_eps.append(sum_reward_eps / len(dataset["eval_users_dict"]))
        _reward_step.append(sum_reward_step / len(dataset["eval_users_dict"]))

    print("Precision ", K, round(np.mean(_precision), 4))
    print("NDCG ", K, round(np.mean(_ndcg), 4))
    print("Reward episode ", K, round(np.mean(_reward_eps), 4))
    print("Reward steps ", K, round(np.mean(_reward_step), 4))