In [1]:
import sys 
sys.path.append('..')

import pandas as pd
from src.recsys_fair_metrics.recsys_fair import RecsysFair
from src.recsys_fair_metrics.util.util import parallel_literal_eval

import json 
import pickle

from src.data.obp_dataset import MovieLensDataset

import torch
from tqdm import tqdm
from obp.policy.policy_type import PolicyType
from src.model.pmf import PMF


In [8]:
dataset_path = "../data/movie_lens_100k_output_path.json"
with open(dataset_path) as json_file:
    _dataset_path = json.load(json_file)


dataset = {}
with open(os.path.join("..", _dataset_path["eval_users_dict"]), "rb") as pkl_file:
    dataset["eval_users_dict"] = pickle.load(pkl_file)

with open(os.path.join("..", _dataset_path["eval_users_dict_positive_items"]), "rb") as pkl_file:
    dataset["eval_users_dict_positive_items"] = pickle.load(pkl_file)

with open(os.path.join("..", _dataset_path["eval_users_history_lens"]), "rb") as pkl_file:
    dataset["eval_users_history_lens"] = pickle.load(pkl_file)

with open(os.path.join("..", _dataset_path["users_history_lens"]), "rb") as pkl_file:
    dataset["users_history_lens"] = pickle.load(pkl_file)

with open(os.path.join("..", _dataset_path["movies_groups"]), "rb") as pkl_file:
    dataset["movies_groups"] = pickle.load(pkl_file)

obp_dataset = MovieLensDataset(
    data_path=os.path.join(os.getcwd(), "../data/"), 
    embedding_network_weights_path="../model/pmf/emb_50_ratio_0.800000_bs_1000_e_258_wd_0.100000_lr_0.000100_trained_pmf.pt", 
    embedding_dim=50,
    users_num=943,
    items_num=1682,
    state_size=5,
    filter_ids=list(dataset["eval_users_dict"].keys())
)


In a future version of pandas all arguments of read_csv except for the argument 'filepath_or_buffer' will be keyword-only



----- Finished data load
----- Preprocessing dataset
Finished preprocessing


In [9]:
train_ids = [
    "egreedy_0.1_2021-10-29_23-50-32.pkl",
    "linear_ucb_0.1_2021-11-04_15-01-07.pkl",
    "wfair_linear_ucb_0.1_2021-11-04_15-01-15.pkl"
]

algorithm = "drr"
train_version = "bandits"
train_id = train_ids[2]
output_path = "../model/{}/{}".format(train_version, train_id)

users_num = 943
items_num = 1682
state_size = 5
embedding_dim = 50
emb_model = "user_movie"
embedding_network_weights = "../model/pmf/emb_50_ratio_0.800000_bs_1000_e_258_wd_0.100000_lr_0.000100_trained_pmf.pt"
n_groups = 10
fairness_constraints = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]

top_k = None
done_count = 10

device = torch.device(
    "cuda" if torch.cuda.is_available() else "cpu"
)

reward_model = PMF(users_num, items_num, embedding_dim).to(device)
reward_model.load_state_dict(
    torch.load(embedding_network_weights, map_location=torch.device(device))
)
user_embeddings = reward_model.user_embeddings.weight.data
item_embeddings = reward_model.item_embeddings.weight.data

In [10]:
dim_context = 150

with open(output_path, "rb") as pkl_file:
    bandit = pickle.load(pkl_file)

bandit.len_list = 10

selected_actions_list = list()
estimated_rewards = list() 
for index, row in tqdm(obp_dataset.data.iterrows(), total=obp_dataset.data.shape[0]):

    action_ = row["movie_id"]
    reward_ = 0 if row["rating"] < 4 else 1
    user_eb = user_embeddings[row["user_id"]]
    items_eb = item_embeddings[row["item_id_history"]]
    item_ave = torch.mean(items_eb, 0)
    context_ = torch.cat((user_eb, user_eb * item_ave, item_ave), 0).cpu().numpy()

    # select a list of actions
    if bandit.policy_type == PolicyType.CONTEXT_FREE:
        selected_actions = bandit.select_action()
    elif bandit.policy_type == PolicyType.CONTEXTUAL:
        selected_actions = bandit.select_action(
            context_.reshape(1, dim_context)
        )
    action_match_ = action_ == selected_actions[0]
    # update parameters of a bandit policy
    # only when selected actions&positions are equal to logged actions&positions
    if action_match_:
        if bandit.policy_type == PolicyType.CONTEXT_FREE:
            bandit.update_params(action=action_, reward=reward_)
        elif bandit.policy_type == PolicyType.CONTEXTUAL:
            bandit.update_params(
                action=action_,
                reward=reward_,
                context=context_.reshape(1, dim_context),
            )

    

    selected_actions_list.append(selected_actions)

100%|██████████| 16983/16983 [26:23<00:00, 10.73it/s]


In [11]:
_df = obp_dataset.data.copy()
_df["sorted_actions"] = selected_actions_list

In [12]:
_item_metadata = pd.DataFrame(dataset["movies_groups"].items(), columns=["movie_id", "group"])


In [13]:
_df.to_csv("./df.csv", index=False)
_item_metadata.to_csv("./item.csv", index=False)

In [2]:
import numpy as np
def converter(instr):
    return np.fromstring(instr[1:-1],sep=' ')

_df=pd.read_csv("./df.csv",converters={'sorted_actions':converter})
_item_metadata = pd.read_csv("./item.csv")

In [3]:
user_column = "user_id"
item_column = "movie_id"
reclist_column = "sorted_actions"

recsys_fair = RecsysFair(
    df = _df, 
    supp_metadata = _item_metadata,
    user_column = user_column, 
    item_column = item_column, 
    reclist_column = reclist_column, 
)

fair_column = "group"
ex = recsys_fair.exposure(fair_column, 10)

100%|██████████| 16983/16983 [00:00<00:00, 4578471.84it/s]


In [4]:
fig = ex.show(kind='per_group_norm', column=fair_column)
fig.show()
#fig.write_image("exposure_per_group.png")

In [16]:
fig = ex.show(kind='per_rank_pos', column=fair_column)
fig.write_image("exposure_per_rank.png")

In [17]:
train_ids = [
    "egreedy_0.1_2021-10-29_23-50-32.pkl",
    "linear_ucb_0.1_2021-11-04_15-01-07.pkl",
    "wfair_linear_ucb_0.1_2021-11-04_15-01-15.pkl"
]

algorithm = "drr"
train_version = "bandits"
train_id = train_ids[0]
output_path = "../model/{}/{}".format(train_version, train_id)

users_num = 943
items_num = 1682
state_size = 5
embedding_dim = 50
emb_model = "user_movie"
embedding_network_weights = "../model/pmf/emb_50_ratio_0.800000_bs_1000_e_258_wd_0.100000_lr_0.000100_trained_pmf.pt"
n_groups = 10
fairness_constraints = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]

top_k = None
done_count = 10

device = torch.device(
    "cuda" if torch.cuda.is_available() else "cpu"
)

reward_model = PMF(users_num, items_num, embedding_dim).to(device)
reward_model.load_state_dict(
    torch.load(embedding_network_weights, map_location=torch.device(device))
)
user_embeddings = reward_model.user_embeddings.weight.data
item_embeddings = reward_model.item_embeddings.weight.data

dim_context = 150

with open(output_path, "rb") as pkl_file:
    bandit = pickle.load(pkl_file)

bandit.len_list = 10

selected_actions_list = list()
estimated_rewards = list() 
for index, row in tqdm(obp_dataset.data.iterrows(), total=obp_dataset.data.shape[0]):

    action_ = row["movie_id"]
    reward_ = 0 if row["rating"] < 4 else 1
    user_eb = user_embeddings[row["user_id"]]
    items_eb = item_embeddings[row["item_id_history"]]
    item_ave = torch.mean(items_eb, 0)
    context_ = torch.cat((user_eb, user_eb * item_ave, item_ave), 0).cpu().numpy()

    # select a list of actions
    if bandit.policy_type == PolicyType.CONTEXT_FREE:
        selected_actions = bandit.select_action()
    elif bandit.policy_type == PolicyType.CONTEXTUAL:
        selected_actions = bandit.select_action(
            context_.reshape(1, dim_context)
        )
    action_match_ = action_ == selected_actions[0]
    # update parameters of a bandit policy
    # only when selected actions&positions are equal to logged actions&positions
    if action_match_:
        if bandit.policy_type == PolicyType.CONTEXT_FREE:
            bandit.update_params(action=action_, reward=reward_)
        elif bandit.policy_type == PolicyType.CONTEXTUAL:
            bandit.update_params(
                action=action_,
                reward=reward_,
                context=context_.reshape(1, dim_context),
            )

    

    selected_actions_list.append(selected_actions)

_df = obp_dataset.data.copy()
_df["sorted_actions"] = selected_actions_list

_item_metadata = pd.DataFrame(dataset["movies_groups"].items(), columns=["movie_id", "group"])


user_column = "user_id"
item_column = "movie_id"
reclist_column = "sorted_actions"

recsys_fair = RecsysFair(
    df = _df, 
    supp_metadata = _item_metadata,
    user_column = user_column, 
    item_column = item_column, 
    reclist_column = reclist_column, 
)

fair_column = "group"
ex = recsys_fair.exposure(fair_column, 10)

fig = ex.show(kind='per_group_norm', column=fair_column)
fig.write_image("exposure_per_group_2.png")

fig = ex.show(kind='per_rank_pos', column=fair_column)
fig.write_image("exposure_per_rank_2.png")

100%|██████████| 16983/16983 [00:05<00:00, 3014.13it/s]
100%|██████████| 16983/16983 [00:00<00:00, 4610178.29it/s]
