In [None]:
import sys 
sys.path.append('..')

#Dependencies

import os
from tqdm import tqdm
import pandas as pd
import numpy as np
import torch
import matplotlib.pyplot as plt

import json 
import pickle

from src.environment.ml_env import OfflineEnv, OfflineFairEnv
from src.model.recommender import DRRAgent, FairRecAgent
from src.model.pmf import PMF

import obp
from obp.policy.policy_type import PolicyType
from src.model.bandit import EpsilonGreedy, LinUCB, WFairLinUCB, FairLinUCB

In [None]:
dataset_path = "../data/movie_lens_100k_output_path.json"
with open(dataset_path) as json_file:
    _dataset_path = json.load(json_file)


dataset = {}
with open(os.path.join("..", _dataset_path["eval_users_dict"]), "rb") as pkl_file:
    dataset["eval_users_dict"] = pickle.load(pkl_file)

with open(os.path.join("..", _dataset_path["eval_users_dict_positive_items"]), "rb") as pkl_file:
    dataset["eval_users_dict_positive_items"] = pickle.load(pkl_file)

with open(os.path.join("..", _dataset_path["eval_users_history_lens"]), "rb") as pkl_file:
    dataset["eval_users_history_lens"] = pickle.load(pkl_file)

with open(os.path.join("..", _dataset_path["users_history_lens"]), "rb") as pkl_file:
    dataset["users_history_lens"] = pickle.load(pkl_file)

with open(os.path.join("..", _dataset_path["movies_groups"]), "rb") as pkl_file:
    dataset["movies_groups"] = pickle.load(pkl_file)

In [None]:
ENV = dict(drr=OfflineEnv, fairrec=OfflineFairEnv)
AGENT = dict(drr=DRRAgent, fairrec=FairRecAgent)

In [None]:
train_ids = [
    "egreedy_0.1_2021-10-29_23-50-32.pkl",
    "linear_ucb_0.1_2021-11-04_15-01-07.pkl",
    "wfair_linear_ucb_0.1_2021-11-04_15-01-15.pkl"
]

In [None]:
train_version = "bandits"
train_id = train_ids[2]
output_path = "../model/{}/{}".format(train_version, train_id)

users_num = 943
items_num = 1682
state_size = 5
embedding_dim = 50

embedding_network_weights = "../model/pmf/emb_50_ratio_0.800000_bs_1000_e_258_wd_0.100000_lr_0.000100_trained_pmf.pt"
n_groups = 10
fairness_constraints = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]

top_k = [5, 10]
done_count = 10

In [None]:
device = torch.device(
    "cuda" if torch.cuda.is_available() else "cpu"
)

reward_model = PMF(users_num, items_num, embedding_dim)
reward_model.load_state_dict(
    torch.load(embedding_network_weights, map_location=torch.device(device))
)
user_embeddings = reward_model.user_embeddings.weight.data
item_embeddings = reward_model.item_embeddings.weight.data

In [None]:
def calculate_ndcg(rel, irel):
    dcg = 0
    idcg = 0
    rel = [1 if r > 0 else 0 for r in rel]
    for i, (r, ir) in enumerate(zip(rel, irel)):
        dcg += (r) / np.log2(i + 2)
        idcg += (ir) / np.log2(i + 2)

    return dcg, idcg

In [None]:
for K in top_k:
    _precision = []
    _ndcg = []
    for i in range(10):
        sum_precision = 0
        sum_ndcg = 0
        sum_propfair = 0

        env = OfflineEnv(
            users_dict=dataset["eval_users_dict"],
            users_history_lens=dataset["eval_users_history_lens"],
            n_groups=n_groups,
            movies_groups=dataset["movies_groups"],
            state_size=state_size,
            done_count=done_count,
            fairness_constraints=fairness_constraints,
        )
        available_users = env.available_users

        with open(output_path, "rb") as pkl_file:
            recommender = pickle.load(pkl_file)

        recommender.len_list = K

        for user_id in tqdm(available_users):
            eval_env = OfflineEnv(
                users_dict=dataset["eval_users_dict"],
                users_history_lens=dataset["eval_users_history_lens"],
                n_groups=n_groups,
                movies_groups=dataset["movies_groups"],
                state_size=state_size,
                done_count=done_count,
                fairness_constraints=fairness_constraints,
                fix_user_id=user_id
            )

            available_items = set(eval_env.user_items.keys())

            # episodic reward
            episode_reward = 0
            steps = 0

            mean_precision = 0
            mean_ndcg = 0

            # Environment
            user_id, items_ids, done = env.reset()

            while not done:
                # select a list of actions
                if recommender.policy_type == PolicyType.CONTEXT_FREE:
                    selected_actions = recommender.select_action(list(available_items))
                elif recommender.policy_type == PolicyType.CONTEXTUAL:
                    # observe current state & Find action
                    user_eb = user_embeddings[user_id]
                    items_eb = item_embeddings[items_ids]
                    item_ave = torch.mean(items_eb, 0)
                    context = torch.cat((user_eb, user_eb * item_ave, item_ave), 0).cpu().numpy()
                    context = context.reshape(1, 50)
                    selected_actions = recommender.select_action(context, list(available_items))
                
                ## Item
                recommended_item = selected_actions

                # Calculate reward and observe new state (in env)
                ## Step
                next_items_ids, reward, done, _ = env.step(recommended_item, top_k=K)
                if top_k:
                    correct_list = [1 if r > 0 else 0 for r in reward]
                    # ndcg
                    dcg, idcg = calculate_ndcg(
                        correct_list, [1 for _ in range(len(reward))]
                    )
                    mean_ndcg += dcg / idcg

                    # precision
                    correct_num = K - correct_list.count(0)
                    mean_precision += correct_num / K
                else:
                    mean_precision += 1 if reward > 0 else 0

                reward = np.sum(reward)
                items_ids = next_items_ids
                episode_reward += reward
                steps += 1
                available_items = (
                    available_items - set(recommended_item) if available_items else None
                )

            sum_precision += mean_precision / steps
            sum_ndcg += mean_ndcg / steps

            del eval_env

        _precision.append(sum_precision / len(dataset["eval_users_dict"]))
        _ndcg.append(sum_ndcg / len(dataset["eval_users_dict"]))

    print("Precision ", K, round(np.mean(_precision), 4), np.std(_precision))
    print("NDCG ", K, round(np.mean(_ndcg), 4), np.std(_ndcg))