In [None]:
import sys 
sys.path.append('..')

#Dependencies
import os
import json 
import pickle
import wandb
from tqdm import tqdm

import torch
import numpy as np
import pandas as pd
import recmetrics as rm
from random import sample

import plotly.graph_objects as go
import plotly_express as px


from src.model.pmf import PMF
from src.environment import OfflineEnv
from src.model.recommender.bandit import LinUCB, FairLinUCB
from src.recsys_fair_metrics.recsys_fair import RecsysFair

# Settings

In [None]:
AGENT = dict(linucb=LinUCB, fair_linucb = FairLinUCB)
ENV = dict(linucb=OfflineEnv, fair_linucb=OfflineEnv)

ALGORITHM = "linucb"
N_GROUPS = 10
STATE_SIZE = 5
DONE_COUNT = 10
FAIRNESS_CONSTRAINTS = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
REWARD_VERSION = "paper"
REWARD_THRESHOLD = 4
USER_INTENT_THRESHOLD = 0
USER_INTENT = "none"

EMBEDDING_DIM = 100
MAX_EPISODE_NUM = 10000

DATASET_PATH = "../data/yahoo_output_path.json"
USERS_NUM = 15400
ITEMS_NUM = 1000
EMBEDDING_NETWORK_WEIGHTS_PATH = "../model/pmf/yahoo_emb_100_ratio_0.800000_bs_100000_e_59_r0.585930_wd_0.100000_lr_0.000100_trained_pmf.pt"

# DATASET_PATH = "../data/movie_lens_100k_output_path.json"
# USERS_NUM = 943
# ITEMS_NUM = 1682
# EMBEDDING_NETWORK_WEIGHTS_PATH =  "../model/pmf/ml_100k_emb_100_ratio_0.800000_bs_1000_e_200_wd_0.100000_lr_0.000100_trained_pmf.pt"

SAVE_PATH = "../model/{}/".format(ALGORITHM)

# Load Data

In [None]:
with open(DATASET_PATH) as json_file:
    _dataset_path = json.load(json_file)

dataset = {}
with open(os.path.join("..", _dataset_path["train_users_dict"]), "rb") as pkl_file:
    dataset["train_users_dict"] = pickle.load(pkl_file)

with open(os.path.join("..", _dataset_path["eval_users_dict"]), "rb") as pkl_file:
    dataset["eval_users_dict"] = pickle.load(pkl_file)

# with open(os.path.join("..", _dataset_path["item_groups"]), "rb") as pkl_file:
#     dataset["item_groups"] = pickle.load(pkl_file)

dataset["ratings_df"] = pd.read_csv(os.path.join("..", _dataset_path["ratings_df"]))
dataset["items_df"] = pd.read_csv(os.path.join("..", _dataset_path["items_df"]))
dataset["items_metadata"] = pd.read_csv(os.path.join("..", _dataset_path["items_metadata"]))
dataset["title_emb"] = os.path.join("..",_dataset_path["title_emb"])

In [None]:
item_groups_paths = ["model/yahoo/yahoo_2022-10-04_11-39-07/item_groups.pkl"]

with open(os.path.join("..", item_groups_paths[0]), "rb") as pkl_file:
    dataset["item_groups"] = pickle.load(pkl_file)

In [None]:
agent = AGENT[ALGORITHM](
    epsilon = 0.1,
    dim=5,
    n_actions=ITEMS_NUM,
    len_list=1
)

SAVE_PATH = "../model/{}/{}".format(ALGORITHM, agent.policy_name)
os.makedirs(SAVE_PATH)

In [None]:
reward_model = PMF(USERS_NUM, ITEMS_NUM, EMBEDDING_DIM).to("cuda")
reward_model.load_state_dict(
    torch.load(
        EMBEDDING_NETWORK_WEIGHTS_PATH,
        map_location=torch.device("cuda"),
    )
)

user_embeddings = reward_model.user_embeddings.weight.data
item_embeddings = reward_model.item_embeddings.weight.data

In [None]:
def get_items_emb(_items_ids):
    items_eb = item_embeddings[_items_ids]

    return items_eb

# Training

In [None]:
env = ENV[ALGORITHM](
    users_dict=dataset["train_users_dict"],
    n_groups=N_GROUPS,
    item_groups=dataset["item_groups"],
    items_metadata=dataset["items_metadata"],
    items_df=dataset["items_df"],
    state_size=STATE_SIZE,
    done_count=DONE_COUNT,
    fairness_constraints=FAIRNESS_CONSTRAINTS,
    reward_threshold=REWARD_THRESHOLD,
    reward_version=REWARD_VERSION,
    user_intent_threshold=USER_INTENT_THRESHOLD,
    user_intent=USER_INTENT,
    title_emb_path=dataset["title_emb"],
)

In [None]:
env.reward_model = reward_model
env.item_embeddings = item_embeddings
env.device = "cuda"

In [None]:
sum_precision = 0
sum_ndcg = 0
sum_propfair = 0
sum_reward = 0

for episode in tqdm(range(MAX_EPISODE_NUM)):
    # episodic reward
    episode_reward = 0
    steps = 0
    critic_loss = 0
    actor_loss = 0
    mean_action = 0
    mean_precision = 0
    mean_ndcg = 0

    list_recommended_item = []

    # environment
    user_id, items_ids, done = env.reset()

    while not done:
        # observe current state & Find action
        group_counts = env.get_group_count()
        state = np.array([items_ids])

        ## action
        recommended_item = agent.select_action(state)[0]
        list_recommended_item.append(recommended_item)

        next_items_ids, reward, done, info = env.step(
            recommended_item, top_k=False
        )

        agent.update_params(recommended_item, reward, state)

        items_ids = next_items_ids
        episode_reward += np.sum(reward) if False else reward

        steps += 1

        mean_precision += info["precision"]

        if done:
            propfair = 0
            total_exp = np.sum(env.get_group_count())
            if total_exp > 0:
                propfair = np.sum(
                    np.array(FAIRNESS_CONSTRAINTS)
                    * np.log(
                        1 + np.array(env.get_group_count()) / total_exp
                    )
                )

            sum_precision += mean_precision / steps
            sum_ndcg += mean_ndcg / steps
            sum_propfair += propfair
            sum_reward += episode_reward

In [None]:
with open(os.path.join(SAVE_PATH, "{}.pkl".format(agent.policy_name)), "wb") as file:
    pickle.dump(agent, file)

# Evaluation

In [None]:
import random

In [None]:
item_groups_df = pd.DataFrame(
    dataset["item_groups"].items(), columns=["item_id", "group"]
)
catalog = item_groups_df.item_id.unique().tolist()

top_k = [10]

_precision = []
_propfair = []
_ufg = []
_recommended_item = []
_random_recommended_item = []
_exposure = []
for k in top_k:
    sum_precision = 0
    sum_propfair = 0
    sum_reward = 0

    recommended_item = []
    random_recommended_item = []
    exposure = []

    env = ENV[ALGORITHM](
        users_dict=dataset["eval_users_dict"],
        n_groups=N_GROUPS,
        item_groups=dataset["item_groups"],
        items_metadata=dataset["items_metadata"],
        items_df=dataset["items_df"],
        state_size=STATE_SIZE,
        done_count=k,
        fairness_constraints=FAIRNESS_CONSTRAINTS,
        reward_threshold=REWARD_THRESHOLD,
        reward_version=REWARD_VERSION,
        use_only_reward_model=True,
        user_intent_threshold=USER_INTENT_THRESHOLD,
        user_intent=USER_INTENT,
        title_emb_path=dataset["title_emb"],
    )
    available_users = env.available_users

    for user_id in tqdm(available_users):


        eval_env = ENV[ALGORITHM](
            users_dict=dataset["eval_users_dict"],
            n_groups=N_GROUPS,
            item_groups=dataset["item_groups"],
            items_metadata=dataset["items_metadata"],
            items_df=dataset["items_df"],
            state_size=STATE_SIZE,
            done_count=k,
            fairness_constraints=FAIRNESS_CONSTRAINTS,
            reward_threshold=REWARD_THRESHOLD,
            reward_version=REWARD_VERSION,
            user_intent_threshold=USER_INTENT_THRESHOLD,
            user_intent=USER_INTENT,
            use_only_reward_model=True,
            reward_model=reward_model,
            device="cuda",
            fix_user_id=user_id,
            title_emb_path=dataset["title_emb"],
        ) 

        steps = 0
        mean_precision = 0
        mean_ndcg = 0
        episode_reward = 0

        critic_loss = 0
        actor_loss = 0

        list_recommended_item = []

        # Environment
        user_id, items_ids, done = eval_env.reset()

        while not done:
            
            # observe current state & Find action
            state = np.array([items_ids])
            action = agent.select_action(state)[0]
            list_recommended_item.append(action)

            # Calculate reward and observe new state (in env)
            ## Step
            next_items_ids, reward, done, info = eval_env.step(action)
            # agent.update_params(action, reward, state)

            items_ids = next_items_ids
            episode_reward += np.sum(reward) if top_k else reward
            steps += 1

            mean_precision += info["precision"]

        propfair = 0
        total_exp = np.sum(eval_env.get_group_count())
        if total_exp > 0:
            propfair = np.sum(
                np.array(FAIRNESS_CONSTRAINTS)
                * np.log(1 + np.array(eval_env.get_group_count()) / total_exp)
            )

        result = {
            "precision": mean_precision / steps,
            "propfair": propfair,
            "reward": episode_reward,
            "recommended_items": {user_id: list_recommended_item},
            "exposure": (np.array(eval_env.get_group_count()) / total_exp).tolist(),
            "critic_loss": critic_loss / steps,
            "actor_loss": actor_loss / steps,
        }

        recommended_item.append(result["recommended_items"])
        random_recommended_item.append({user_id: sample(catalog, k)})
        exposure.append(result["exposure"])

        sum_precision += result["precision"]
        sum_propfair += result["propfair"]
        sum_reward += result["reward"]

        del eval_env

    _precision.append(sum_precision / len(dataset["eval_users_dict"]))
    _propfair.append(sum_propfair / len(dataset["eval_users_dict"]))
    _ufg.append(
        (sum_propfair / len(dataset["eval_users_dict"]))
        / (1 - (sum_precision / len(dataset["eval_users_dict"])))
    )
    _recommended_item.append(recommended_item)
    _random_recommended_item.append(random_recommended_item)
    _exposure.append(exposure)

feature_df = pd.DataFrame(
    item_groups_df[["item_id"]]
    .apply(lambda x: get_items_emb(x).cpu().numpy().tolist())[
        "item_id"
    ]
    .tolist()
)

metrics = {}
for k in range(len(top_k)):
    recs = pd.DataFrame(
        [list(i.values()) for i in _recommended_item[k]], columns=["sorted_actions"]
    )

    exposure = np.array(_exposure[k]).mean(axis=0)
    ideal_exposure = np.array(FAIRNESS_CONSTRAINTS) / np.sum(
        FAIRNESS_CONSTRAINTS
    )

    metrics[top_k[k]] = {
        "precision": round(_precision[k] * 100, 4),
        "propfair": round(_propfair[k] * 100, 4),
        "ufg": round(_ufg[k], 4),
        "exposure": exposure.tolist(),
        "ideal_exposure": ideal_exposure.tolist(),
    }

    # fig = go.Figure()
    # fig.add_trace(
    #     go.Scatter(
    #         x=[i for i in range(1, N_GROUPS + 1)],
    #         y=exposure,
    #         mode="lines+markers",
    #         name="Group Exposure",
    #     )
    # )
    # fig.add_trace(
    #     go.Scatter(
    #         x=[i for i in range(1, N_GROUPS + 1)],
    #         y=ideal_exposure,
    #         mode="lines+markers",
    #         name="Ideal Exposure",
    #     )
    # )
    # fig.update_layout(
    #     title="Group Exposure vs Ideal Exposure",
    #     xaxis_title="Group",
    #     yaxis_title="Exposure",
    # )
    # fig.write_image(
    #     os.path.join(
    #         SAVE_PATH,
    #         "group_exposure_vs_ideal_exposure_{}.png".format(k),
    #     )
    # )

    # recs["user_id"] = [list(i.keys())[0] for i in _recommended_item[k]]
    # recsys_fair = RecsysFair(
    #     df=recs,
    #     supp_metadata=item_groups_df,
    #     user_column="user_id",
    #     item_column="item_id",
    #     reclist_column="sorted_actions",
    # )

    # fair_column = "group"
    # ex = recsys_fair.exposure(fair_column, top_k[k])

    # fig = ex.show(kind="per_group_norm", column=fair_column)
    # fig.write_image(
    #     os.path.join(
    #         SAVE_PATH, "exposure_per_group_k{}.png".format(top_k[k])
    #     )
    # )

    # fig = ex.show(kind="per_rank_pos", column=fair_column)
    # fig.write_image(
    #     os.path.join(
    #         SAVE_PATH, "exposure_per_rank_k{}.png".format(top_k[k])
    #     )
    # )

    # recs.to_csv(
    #     os.path.join(
    #         SAVE_PATH, "recommended_item_k{}.csv".format(top_k[k])
    #     )
    # )
    # item_groups_df.to_csv(
    #     os.path.join(SAVE_PATH, "supp_metadata_k{}.csv".format(top_k[k]))
    # )

with open(os.path.join(SAVE_PATH, "metrics.json"), "w") as f:
    json.dump(metrics, f)

print("---------- Finish Evaluation")