In [None]:
import sys 
sys.path.append('..')

#Dependencies
import os
import json 
import pickle
from tqdm import tqdm

import torch
import pandas as pd
import numpy as np

from src.environment.ml_env import OfflineEnv, OfflineFairEnv
from src.model.recommender import DRRAgent, FairRecAgent
from src.model.pmf import PMF

from obp.policy.policy_type import PolicyType

from src.recsys_fair_metrics.recsys_fair import RecsysFair


ENV = dict(drr=OfflineEnv, fairrec=OfflineFairEnv)
AGENT = dict(drr=DRRAgent, fairrec=FairRecAgent)

In [None]:
dataset_path = "../data/movie_lens_100k_output_path.json"
with open(dataset_path) as json_file:
    _dataset_path = json.load(json_file)


dataset = {}
with open(os.path.join("..", _dataset_path["eval_users_dict"]), "rb") as pkl_file:
    dataset["eval_users_dict"] = pickle.load(pkl_file)

with open(os.path.join("..", _dataset_path["eval_users_history_lens"]), "rb") as pkl_file:
    dataset["eval_users_history_lens"] = pickle.load(pkl_file)

with open(os.path.join("..", _dataset_path["users_history_lens"]), "rb") as pkl_file:
    dataset["users_history_lens"] = pickle.load(pkl_file)

with open(os.path.join("..", _dataset_path["movies_groups"]), "rb") as pkl_file:
    dataset["movies_groups"] = pickle.load(pkl_file)

# Actor-Critic Models

In [None]:
drr_train_ids = [
    "movie_lens_100k_2021-11-17_09-24-24", # 0
    "movie_lens_100k_2021-11-17_09-15-08", # 1
    "movie_lens_100k_2021-11-17_10-11-53", # 2
    "movie_lens_100k_2021-11-17_10-15-28", # 3
    # other representations
    "movie_lens_100k_2021-11-17_11-15-40", # 4
    "movie_lens_100k_2021-11-17_11-21-09", # 5
    "movie_lens_100k_2021-11-17_15-29-21", # 6
    "movie_lens_100k_2021-11-17_15-35-16", # 7
    "movie_lens_100k_2021-11-23_11-26-27", # 8
    "movie_lens_100k_2021-11-23_14-41-17", # 9
]

fairrec_train_ids = [
    "movie_lens_100k_fair_2021-11-17_09-24-11", # 0 
    "movie_lens_100k_fair_2021-11-17_09-15-12", # 1
    "movie_lens_100k_fair_2021-11-17_11-10-04", # 2
    "movie_lens_100k_fair_2021-11-17_11-10-25", # 3
    # norm rewards
    "movie_lens_100k_fair_2021-11-17_15-24-39", # 4
    "movie_lens_100k_fair_2021-11-17_15-33-10", # 5
    "movie_lens_100k_fair_2021-11-17_17-25-13", # 6
    "movie_lens_100k_fair_2021-11-17_17-29-41", # 7
    "movie_lens_100k_fair_2021-11-19_17-34-44", # 8
    "movie_lens_100k_fair_2021-11-23_08-54-30", # 9
    "movie_lens_100k_fair_2021-11-23_11-27-04", # 10
    "movie_lens_100k_fair_2021-11-23_14-41-05", # 11
    "movie_lens_100k_fair_2021-11-24_09-27-14", # 12
]

idx = 12


algorithm = "fairrec" 
train_version = "movie_lens_100k" if algorithm == "drr" else "movie_lens_100k_fair"
train_id = drr_train_ids[idx] if algorithm == "drr" else fairrec_train_ids[idx]
output_path = "../model/{}/{}".format(train_version, train_id)

config = {
    "users_num": 943,
    "items_num": 1682,
    "state_size": 5,
    "srm_size": 3 if algorithm == "drr" else 2,
    "embedding_dim": 50,
    "actor_hidden_dim": 512,
    "actor_learning_rate": 0.0001,
    "critic_hidden_dim": 512,
    "critic_learning_rate": 0.001,
    "discount_factor": 0.5,
    "tau": 0.01,
    "learning_starts": 64,
    "replay_memory_size": 1000000,
    "batch_size": 64,
    "emb_model": "user_movie",
    "embedding_network_weights_path": "../model/pmf/emb_50_ratio_0.800000_bs_1000_e_258_wd_0.100000_lr_0.000100_trained_pmf.pt",
    "n_groups": 10,
    "fairness_constraints": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
}


top_k = None
done_count = 10

In [None]:
actor_checkpoint = sorted(
    [
        int((f.split("_")[1]).split(".")[0])
        for f in os.listdir(output_path)
        if f.startswith("actor_")
    ]
)[-1]
critic_checkpoint = sorted(
    [
        int((f.split("_")[1]).split(".")[0])
        for f in os.listdir(output_path)
        if f.startswith("critic_")
    ]
)[-1]
srm_checkpoint = sorted(
    [
        int((f.split("_")[1]).split(".")[0])
        for f in os.listdir(output_path)
        if f.startswith("srm_")
    ]
)[-1]

print(actor_checkpoint, critic_checkpoint, srm_checkpoint)

In [None]:
_precision = []
_propfair = []
_ufg = []
for i in range(1):
    sum_precision = 0
    sum_propfair = 0
    sum_reward = 0

    recommended_item = []

    env = ENV[algorithm](
        users_dict=dataset["eval_users_dict"],
        users_history_lens=dataset["eval_users_history_lens"],
        n_groups=config["n_groups"],
        movies_groups=dataset["movies_groups"],
        state_size=config["state_size"],
        done_count=done_count,
        fairness_constraints=config["fairness_constraints"],
        use_only_reward_model=True,
    )
    available_users = env.available_users

    recommender = AGENT[algorithm](
        env=env,
        train_version=train_version,
        is_test=True,
        model_path=output_path,
        **config
    )

    recommender.load_model(
        os.path.join(output_path, "actor_{}.h5".format(actor_checkpoint)),
        os.path.join(
            output_path, "critic_{}.h5".format(actor_checkpoint)
        ),
        os.path.join(
            output_path, "srm_{}.h5".format(actor_checkpoint)
        ),
    )

    for user_id in tqdm(available_users):

        eval_env = ENV[algorithm](
            users_dict=dataset["eval_users_dict"],
            users_history_lens=dataset["eval_users_history_lens"],
            n_groups=config["n_groups"],
            movies_groups=dataset["movies_groups"],
            state_size=config["state_size"],
            done_count=done_count,
            fairness_constraints=config["fairness_constraints"],
            fix_user_id=user_id,
            reward_model=recommender.reward_model,
            device=recommender.device,
            use_only_reward_model=True,
        )

        recommender.env = eval_env

        precision, ndcg, propfair, reward, list_recommended_item = recommender.train(
            max_episode_num=1, top_k=top_k
        )
        recommended_item.append(list_recommended_item)

        sum_precision += precision
        sum_propfair += propfair
        sum_reward += reward

        del eval_env


    _precision.append(sum_precision / len(dataset["eval_users_dict"]))
    _propfair.append(sum_propfair / len(dataset["eval_users_dict"]))
    _ufg.append((sum_propfair / len(dataset["eval_users_dict"]))
        / (1 - (sum_precision / len(dataset["eval_users_dict"]))))


print("PropFair ", round(np.mean(_propfair), 4))
print("Precision ", round(np.mean(_precision), 4))
print("UFG ", round(np.mean(_ufg), 4))

# Bandit Models

In [None]:
train_ids = [
    "egreedy_0.1_2021-10-29_23-50-32.pkl",
    "linear_ucb_0.1_2021-11-04_15-01-07.pkl",
    "wfair_linear_ucb_0.1_2021-11-04_15-01-15.pkl"
]
idx = 0

train_version = "bandits"
train_id = train_ids[idx]
output_path = "../model/{}/{}".format(train_version, train_id)

users_num = 943
items_num = 1682
state_size = 5
embedding_dim = 50
emb_model = "user_movie"
embedding_network_weights = "../model/pmf/emb_50_ratio_0.800000_bs_1000_e_258_wd_0.100000_lr_0.000100_trained_pmf.pt"
n_groups = 10
fairness_constraints = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]

top_k = None
done_count = 10

In [None]:
device = torch.device(
    "cuda" if torch.cuda.is_available() else "cpu"
)

reward_model = PMF(users_num, items_num, embedding_dim)
reward_model.load_state_dict(
    torch.load(embedding_network_weights, map_location=torch.device(device))
)
user_embeddings = reward_model.user_embeddings.weight.data
item_embeddings = reward_model.item_embeddings.weight.data

In [None]:
_precision = []
_propfair = []
_ufg = []
for i in range(1):
    sum_precision = 0
    sum_propfair = 0
    sum_reward = 0

    env = OfflineEnv(
        users_dict=dataset["eval_users_dict"],
        users_history_lens=dataset["eval_users_history_lens"],
        n_groups=n_groups,
        movies_groups=dataset["movies_groups"],
        state_size=state_size,
        done_count=done_count,
        fairness_constraints=fairness_constraints,
        use_only_reward_model=True,
    )
    available_users = env.available_users


    with open(output_path, "rb") as pkl_file:
        bandit = pickle.load(pkl_file)

    recommended_item = []
    for user_id in tqdm(available_users):
        recommended_item.append({user_id: []})

        # with open(output_path, "rb") as pkl_file:
        #     bandit = pickle.load(pkl_file)

        eval_env = OfflineEnv(
            users_dict=dataset["eval_users_dict"],
            users_history_lens=dataset["eval_users_history_lens"],
            n_groups=n_groups,
            movies_groups=dataset["movies_groups"],
            state_size=state_size,
            done_count=done_count,
            fairness_constraints=fairness_constraints,
            fix_user_id=user_id,
            reward_model=reward_model,
            use_only_reward_model=True,
        )
        
        steps = 0
        mean_precision = 0

        # environment
        user_id, items_ids, done = eval_env.reset()
        bandit.clear_group_count()

        while not done:
            steps += 1

            # select a list of actions
            if bandit.policy_type == PolicyType.CONTEXT_FREE:
                selected_actions = bandit.select_action()
            elif bandit.policy_type == PolicyType.CONTEXTUAL:
                # observe current state & Find action
                user_eb = user_embeddings[user_id]
                items_eb = item_embeddings[items_ids]
                item_ave = torch.mean(items_eb, 0)
                context = torch.cat((user_eb, user_eb * item_ave, item_ave), 0).cpu().numpy()
                context = context.reshape(1, 150)
                selected_actions = bandit.select_action(context)

            # calculate reward and observe new state
            recommended_item[-1][user_id].append(selected_actions[0])
            
            ## Step
            next_items_ids, rewards, done, _ = eval_env.step(
                selected_actions[0]
            )


            rewards = 1 if (rewards*2)+3 >= 4 else 0

            for action, reward in zip(selected_actions, [rewards]):
                if bandit.policy_type == PolicyType.CONTEXT_FREE:
                    bandit.update_params(action=action, reward=reward)
                elif bandit.policy_type == PolicyType.CONTEXTUAL:
                    bandit.update_params(
                        action=action,
                        reward=reward,
                        context=context,
                    )

            mean_precision += rewards
            sum_reward += rewards
            

            if done:
                sum_propfair += bandit.propfair
                sum_precision += mean_precision / steps

        del eval_env

    _precision.append(sum_precision / len(dataset["eval_users_dict"]))
    _propfair.append(sum_propfair / len(dataset["eval_users_dict"]))
    _ufg.append((sum_propfair / len(dataset["eval_users_dict"]))
        / (1 - (sum_precision / len(dataset["eval_users_dict"]))))

print("PropFair ",  round(np.mean(_propfair), 4))
print("Precision ", round(np.mean(_precision), 4))
print("UFG ", round(np.mean(_ufg), 4))


# Exposure

In [None]:
_df = pd.DataFrame([i.values() for i in recommended_item], columns=["sorted_actions"])
_df["user_id"] = [list(i.keys())[0] for i in recommended_item]
_item_metadata = pd.DataFrame(dataset["movies_groups"].items(), columns=["movie_id", "group"])

user_column = "user_id"
item_column = "movie_id"
reclist_column = "sorted_actions"

recsys_fair = RecsysFair(
    df = _df, 
    supp_metadata = _item_metadata,
    user_column = user_column, 
    item_column = item_column, 
    reclist_column = reclist_column, 
)

fair_column = "group"
ex = recsys_fair.exposure(fair_column, 10)

In [None]:
fig = ex.show(kind='per_group_norm', column=fair_column)
fig.show()
fig.write_image(os.path.join(output_path, "exposure_per_group.png"))

In [None]:
fig = ex.show(kind='per_rank_pos', column=fair_column)
fig.show()
fig.write_image(os.path.join(output_path, "exposure_per_rank.png"))