In [1]:
#Dependencies

import os
from tqdm import tqdm
import pandas as pd
import numpy as np
import torch
import itertools
import matplotlib.pyplot as plt
import time
import math
import collections

from src.environment.ml_env import OfflineEnv, OfflineFairEnv
from src.model.recommender import DRRAgent, FairRecAgent

In [2]:
import json 
import pickle

dataset_path = "data/movie_lens_100k_output_path.json"
with open(dataset_path) as json_file:
    _dataset_path = json.load(json_file)


dataset = {}
with open(_dataset_path["eval_users_dict"], "rb") as pkl_file:
    dataset["eval_users_dict"] = pickle.load(pkl_file)

with open(_dataset_path["eval_users_dict_positive_items"], "rb") as pkl_file:
    dataset["eval_users_dict_positive_items"] = pickle.load(pkl_file)

with open(_dataset_path["eval_users_history_lens"], "rb") as pkl_file:
    dataset["eval_users_history_lens"] = pickle.load(pkl_file)

with open(_dataset_path["users_history_lens"], "rb") as pkl_file:
    dataset["users_history_lens"] = pickle.load(pkl_file)

with open(_dataset_path["movies_groups"], "rb") as pkl_file:
    dataset["movies_groups"] = pickle.load(pkl_file)

# RL

In [3]:
ENV = dict(drr=OfflineEnv, fairrec=OfflineFairEnv)
AGENT = dict(drr=DRRAgent, fairrec=FairRecAgent)

In [4]:
algorithm = "fairrec"
train_version = "movie_lens_100k_fair"
train_id = "movie_lens_100k_fair_2021-10-24_01-41-02"
output_path = "model/{}/{}".format(train_version, train_id)

users_num = 943
items_num = 1682

state_size = 5
srm_size = 2

embedding_dim = 50
actor_hidden_dim = 512
actor_learning_rate = 0.0001
critic_hidden_dim = 512
critic_learning_rate = 0.001
discount_factor = 0.9
tau = 0.01
replay_memory_size = 1000000
batch_size = 64
emb_model = "user_movie"
embedding_network_weights = "model/pmf/emb_50_ratio_0.800000_bs_1000_e_258_wd_0.100000_lr_0.000100_trained_pmf.pt"
n_groups = 10
fairness_constraints = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
top_k = 10

In [None]:
actor_checkpoints = sorted(
    [
        int((f.split("_")[1]).split(".")[0])
        for f in os.listdir(output_path)
        if f.startswith("actor_")
    ]
) #[-1]
critic_checkpoints = sorted(
    [
        int((f.split("_")[1]).split(".")[0])
        for f in os.listdir(output_path)
        if f.startswith("critic_")
    ]
) #[-1]

print(actor_checkpoints, critic_checkpoints)

In [None]:
for actor_checkpoint, critic_checkpoint in zip(actor_checkpoints, critic_checkpoints):
    sum_precision = 0
    sum_ndcg = 0
    sum_propfair = []
    sum_cvr = []

    env = ENV[algorithm](
        dataset["eval_users_dict"],
        dataset["eval_users_dict_positive_items"],
        dataset["eval_users_history_lens"],
        len(fairness_constraints),
        dataset["movies_groups"],
        state_size,
        fairness_constraints,
    )
    available_users = env.available_users

    recommender = AGENT[algorithm](
        env=env,
        users_num=users_num,
        items_num=items_num,
        genres_num=0,
        movies_genres_id={}, 
        srm_size=srm_size,
        state_size=state_size,
        train_version=train_version,
        is_test=True,
        embedding_dim=embedding_dim,
        actor_hidden_dim=actor_hidden_dim,
        actor_learning_rate=actor_learning_rate,
        critic_hidden_dim=critic_hidden_dim,
        critic_learning_rate=critic_learning_rate,
        discount_factor=discount_factor,
        tau=tau,
        replay_memory_size=replay_memory_size,
        batch_size=batch_size,
        model_path=output_path,
        emb_model=emb_model,
        embedding_network_weights_path=embedding_network_weights,
        n_groups=n_groups,
        fairness_constraints=fairness_constraints,
    )

    recommender.load_model(
        os.path.join(output_path, "actor_{}.h5".format(actor_checkpoint)),
        os.path.join(
            output_path, "critic_{}.h5".format(critic_checkpoint)
        ),
    )
    for user_id in tqdm(available_users):
        eval_env = ENV[algorithm](
            dataset["eval_users_dict"],
            dataset["eval_users_dict_positive_items"],
            dataset["eval_users_history_lens"],
            len(fairness_constraints),
            dataset["movies_groups"],
            state_size,
            fairness_constraints,
            fix_user_id=user_id
        )

        recommender.env = eval_env

        precision, ndcg, propfair = recommender.evaluate(
            eval_env, top_k=top_k
        )
        sum_precision += precision
        sum_ndcg += ndcg

        sum_propfair.append(propfair)
        sum_cvr.append(precision)

        del eval_env

    print("---------- Evaluation")
    print("- precision@: ", round(sum_precision / len(dataset["eval_users_dict"]), 4))
    print("- ndcg@: ", round(sum_ndcg / len(dataset["eval_users_dict"]), 4))
    print("- propfair: ", 
        round(np.mean(np.array(sum_propfair)), 4), 
        round(np.std(np.array(sum_propfair)), 4)
    ), 
    print(
        "- ufg: ",
        round((np.mean(np.array(sum_propfair)))
        / (1 - (sum_precision / len(dataset["eval_users_dict"]))), 4)
    )
    print()

# Bandits OBP

In [5]:
import obp
from src.model.pmf import PMF
from bandit import EpsilonGreedy, LinUCB, WFairLinUCB, FairLinUCB

In [6]:
def calculate_ndcg(rel, irel):
    dcg = 0
    idcg = 0
    rel = [1 if r > 0 else 0 for r in rel]
    for i, (r, ir) in enumerate(zip(rel, irel)):
        dcg += (r) / np.log2(i + 2)
        idcg += (ir) / np.log2(i + 2)

    return dcg, idcg

In [7]:
def evaluate(env, recommender, fairness_constraints, user_embeddings, item_embeddings, top_k=0, context_free=False):
    # episodic reward
    episode_reward = 0
    correct_count = 0
    steps = 0

    mean_precision = 0
    mean_ndcg = 0

    # Environment
    user_id, items_ids, done = env.reset()

    while not done:
        if context_free:
            recommended_item = recommender.select_action()
        else:
            # Observe current state and Find action
            ## Embedding
            user_eb = user_embeddings[user_id].unsqueeze(0)
            items_eb = item_embeddings[items_ids]
            item_ave = torch.mean(items_eb, 0).unsqueeze(0)


            context = torch.cat(
                (
                    user_eb,
                    user_eb * item_ave,
                    item_ave,
                ),
                1,
            )

            recommended_item = recommender.select_action(context.detach().cpu().numpy())


        # Calculate reward and observe new state (in env)
        ## Step
        next_items_ids, reward, done, _ = env.step(recommended_item, top_k=top_k)

        if top_k:
            correct_list = [1 if r > 0 else 0 for r in reward]
            # ndcg
            dcg, idcg = calculate_ndcg(
                correct_list, [1 for _ in range(len(reward))]
            )
            mean_ndcg += dcg / idcg

            # precision
            correct_num = top_k - correct_list.count(0)
            mean_precision += correct_num / top_k
        else:
            mean_precision = reward

        reward = np.sum(reward)
        items_ids = next_items_ids
        episode_reward += reward
        steps += 1

        #print(steps, len(recommended_item), len(env.recommended_items), done)

    mean_precision = mean_precision / steps
    mean_ndcg = mean_ndcg / steps

    propfair = 0
    total_exp = np.sum(list(env.group_count.values()))
    if total_exp > 0:
        propfair = np.sum(
            np.array(fairness_constraints)
            * np.log(1 + np.array(list(env.group_count.values())) / total_exp)
        )

    return (
        mean_precision,
        mean_ndcg,
        propfair,
        env.group_count
    )

In [8]:
bandits = [
    "model/bandits/egreedy_0.1_2021-10-23_01-41-11.pkl",
    "model/bandits/linear_ucb_0.25_2021-10-24_18-22-05.pkl",
    "model/bandits/wfair_linear_ucb_0.25_2021-10-24_18-22-29.pkl"
    "model/bandits/fair_linear_ucb_0.25_2021-10-23_01-41-36.pkl"
]

In [9]:
context_free = False
bandit_path = bandits[1]

In [10]:
sum_precision = 0
sum_ndcg = 0
sum_propfair = 0
sum_cvr = 0
sum_group_count = {k: 0 for k in range(1, 11)}

device = torch.device(
    "cuda" if torch.cuda.is_available() else "cpu"
)

env = OfflineEnv(
    dataset["eval_users_dict"],
    dataset["eval_users_dict_positive_items"],
    dataset["eval_users_history_lens"],
    len(fairness_constraints),
    dataset["movies_groups"],
    state_size,
    fairness_constraints,
    device=device
)
available_users = env.available_users

with open(bandit_path, "rb") as pkl_file:
    recommender = pickle.load(pkl_file)
recommender.len_list = top_k

reward_model = PMF(users_num, items_num, embedding_dim).to(
    device
)
reward_model.load_state_dict(
    torch.load(
        embedding_network_weights,
        map_location=torch.device(device),
    )
)
user_embeddings = reward_model.user_embeddings.weight.data
item_embeddings = reward_model.item_embeddings.weight.data

for user_id in tqdm(available_users):
    eval_env = OfflineEnv(
        dataset["eval_users_dict"],
        dataset["eval_users_dict_positive_items"],
        dataset["eval_users_history_lens"],
        len(fairness_constraints),
        dataset["movies_groups"],
        state_size,
        fairness_constraints,
        fix_user_id=user_id,
        reward_model=reward_model,
        device=device
    )
    precision, ndcg, propfair, group_count = evaluate(
        eval_env, recommender, fairness_constraints, user_embeddings, item_embeddings, top_k=top_k, context_free=context_free
    )
    sum_precision += precision
    sum_ndcg += ndcg
    sum_propfair += propfair
    sum_group_count = collections.Counter(sum_group_count) + collections.Counter(group_count)

    del eval_env

print("---------- Evaluation")
print("- precision@: ", round(sum_precision / len(dataset["eval_users_dict"]), 4))
print("- ndcg@: ", round(sum_ndcg / len(dataset["eval_users_dict"]), 4))
print("- propfair: ", round(sum_propfair / len(dataset["eval_users_dict"]), 4))
print(
    "- ufg: ",
    round((sum_propfair / len(dataset["eval_users_dict"]))
    / (1 - (sum_precision / len(dataset["eval_users_dict"]))), 4)
)
print()

100%|██████████| 185/185 [48:14<00:00, 15.64s/it]

---------- Evaluation
- precision@:  0.1301
- ndcg@:  0.1452
- propfair:  0.8927
- ufg:  1.0261




