In [1]:
#Dependencies
import pandas as pd
import numpy as np
import tensorflow as tf
import itertools
import matplotlib.pyplot as plt
import time
import math

from src.environment.ml_env import OfflineEnv
from src.environment.ml_fair_env import OfflineEnv as OfflineFairEnv
from src.model.recommender import DRRAgent, FairRecAgent

import os

DATA_DIR = "data/ml-100k"
STATE_SIZE = 5

In [2]:
import json 
import pickle

dataset_path = "data/movie_lens_100k_output_path.json"
with open(dataset_path) as json_file:
    _dataset_path = json.load(json_file)

with open(_dataset_path["train_users_dict"], "rb") as pkl_file:
    train_users_dict = pickle.load(pkl_file)
with open(_dataset_path["train_users_history_lens"], "rb") as pkl_file:
    train_users_history_lens = pickle.load(pkl_file)
with open(_dataset_path["eval_users_dict"], "rb") as pkl_file:
    eval_users_dict = pickle.load(pkl_file)
with open(_dataset_path["eval_users_history_lens"], "rb") as pkl_file:
    eval_users_history_lens = pickle.load(pkl_file)
with open(_dataset_path["users_history_lens"], "rb") as pkl_file:
    users_history_lens = pickle.load(pkl_file)
with open(_dataset_path["movies_id_to_movies"], "rb") as pkl_file:
    movies_id_to_movies = pickle.load(pkl_file)
with open(_dataset_path["movies_groups"], "rb") as pkl_file:
    movies_groups = pickle.load(pkl_file)

In [3]:
users_num = 943
items_num = 1682

### Training setting

In [4]:
train_users_num = int(users_num * 0.8)
train_items_num = items_num
print(train_users_num, train_items_num)

754 1682


### Evaluating setting

In [5]:
eval_users_num = int(users_num * 0.2)
eval_items_num = items_num
print(eval_users_num, eval_items_num)

188 1682


### Evalutation

In [6]:
def evaluate(recommender, env, top_k=False):
    # episodic reward
    episode_reward = 0
    steps = 0
    mean_precision = 0
    mean_ndcg = 0
    mean_cvr = 0
    mean_propfair = 0
    mean_ufg = 0
    # Environment
    user_id, items_ids, done = env.reset()
    while not done:

        # Observe current state & Find action
        ## Embedding
        user_eb = recommender.embedding_network.get_layer('user_embedding')(np.array(user_id))
        items_eb = recommender.embedding_network.get_layer('movie_embedding')(np.array(items_ids))

        ## SRM state
        state = recommender.srm_ave([np.expand_dims(user_eb, axis=0), np.expand_dims(items_eb, axis=0)])
        ## Action(ranking score) 
        action = recommender.actor.network(state)
        ## Item 
        recommended_item = recommender.recommend_item(action, env.recommended_items, top_k=top_k)
        # Calculate reward and observe new state (in env)
        ## Step
        next_items_ids, reward, done, _ = env.step(recommended_item, top_k=top_k)
        if top_k:
            correct_list = [1 if r > 0 else 0 for r in reward]
            # ndcg
            dcg, idcg = calculate_ndcg(correct_list, [1 for _ in range(len(reward))])
            mean_ndcg += dcg/idcg
            
            #precision
            correct_num = top_k-correct_list.count(0)
            mean_precision += correct_num/top_k
            
        reward = np.sum(reward)
        items_ids = next_items_ids
        episode_reward += reward
        steps += 1

        propfair = 0
        for group in range(10):
            _group = group + 1
            if _group not in env.group_count:
                env.group_count[_group] = 0

            propfair += [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1][group] * math.log(
                1 + (env.group_count[_group] / len(recommended_item))
            )
        
        cvr = correct_num / len(recommended_item)
        ufg = propfair / max(1 - cvr, 0.01)

        mean_propfair += propfair
        mean_cvr += cvr
        mean_ufg += ufg
    
    return mean_precision/steps, mean_ndcg/steps, mean_propfair/steps, mean_cvr/steps, mean_ufg/steps

def calculate_ndcg(rel, irel):
    dcg = 0
    idcg = 0
    rel = [1 if r>0 else 0 for r in rel]
    for i, (r, ir) in enumerate(zip(rel, irel)):
        dcg += (r)/np.log2(i+2)
        idcg += (ir)/np.log2(i+2)
    return dcg, idcg

In [7]:
def evaluate_fair(recommender, env, top_k=False):
    # episodic reward
    episode_reward = 0
    steps = 0
    mean_precision = 0
    mean_ndcg = 0
    mean_cvr = 0
    mean_propfair = 0
    mean_ufg = 0
    # Environment
    user_id, items_ids, done = env.reset()
    while not done:

        # Observe current state & Find action
        ## Embedding
        items_eb = recommender.embedding_network.get_layer("movie_embedding")(
            np.array(items_ids)
        )

        groups_eb = []
        for items in items_ids:
            groups_eb.append(
                recommender.embedding_network.get_layer("movie_embedding")(
                    np.array(
                        [
                            k - 1
                            for k, v in env.movies_groups.items()
                            if v == env.movies_groups[items]
                        ]
                    )
                )
            )

        fairness_allocation = []
        for group in range(recommender.n_groups):
            _group = group + 1
            if _group not in env.group_count:
                env.group_count[_group] = 0
            fairness_allocation.append(
                env.group_count[_group] / len(env.recommended_items)
            )

        ## SRM state
        state = recommender.srm_ave(
            [
                np.expand_dims(items_eb, axis=0),
                groups_eb,
                np.expand_dims(fairness_allocation, axis=0),
            ]
        )
        ## Action(ranking score) 
        action = recommender.actor.network(state)
        ## Item 
        recommended_item = recommender.recommend_item(action, env.recommended_items, top_k=top_k)
        # Calculate reward and observe new state (in env)
        ## Step
        next_items_ids, reward, done, _ = env.step(recommended_item, top_k=top_k)
        if top_k:
            correct_list = [1 if r > 0 else 0 for r in reward]
            # ndcg
            dcg, idcg = calculate_ndcg(correct_list, [1 for _ in range(len(reward))])
            mean_ndcg += dcg/idcg
            
            #precision
            correct_num = top_k-correct_list.count(0)
            mean_precision += correct_num/top_k
            
        reward = np.sum(reward)
        items_ids = next_items_ids
        episode_reward += reward
        steps += 1

        propfair = 0
        for group in range(10):
            _group = group + 1
            if _group not in env.group_count:
                env.group_count[_group] = 0

            propfair += [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1][group] * math.log(
                1 + (env.group_count[_group] / len(recommended_item))
            )
        
        cvr = correct_num / len(recommended_item)
        ufg = propfair / max(1 - cvr, 0.01)

        mean_propfair += propfair
        mean_cvr += cvr
        mean_ufg += ufg
    
    return mean_precision/steps, mean_ndcg/steps, mean_propfair/steps, mean_cvr/steps, mean_ufg/steps

In [8]:
tf.keras.backend.set_floatx('float64')

In [9]:
sum_precision = 0
sum_ndcg = 0
sum_propfair = 0
sum_cvr = 0
sum_ufg = 0
TOP_K = 10

for user_id in eval_users_dict.keys():
    env = OfflineFairEnv(# OfflineEnv( # OfflineFairEnv(
        eval_users_dict, 
        users_history_lens, 
        movies_id_to_movies, 
        movies_groups, 
        STATE_SIZE, 
        [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
        fix_user_id=user_id
    )
    recommender = FairRecAgent( # DRRAgent( # FairRecAgent(
        env, 
        users_num, 
        items_num, 
        STATE_SIZE,
        6,# 3, # 6,
        "model/movie_lens_100k_fair/", # "model/movie_lens_100k/", # "model/movie_lens_100k_fair/",
        "model/movie_lens_100k_fair/user_movie_at_once.h5", # "model/movie_lens_100k/user_movie_at_once.h5", # "model/movie_lens_100k_fair/user_movie_at_once.h5",
        "movie_lens",
        True,
        False,
        50, 
        128, # 128, # 512,
        0.001,
        128, # 128, # 512,
        0.001,
        0.9,
        0.001,
        1000000,
        32, # 32, # 64,
        10,
        [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1]
    )
    recommender.actor.build_networks()
    recommender.critic.build_networks()
    recommender.load_model('model/movie_lens_100k_fair/actor_15000.h5', 
                           'model/movie_lens_100k_fair/critic_15000.h5')
    precision, ndcg, propfair, cvr, ufg = evaluate_fair(recommender, env, top_k=TOP_K)
    sum_precision += precision
    sum_ndcg += ndcg
    sum_propfair += propfair
    sum_ufg += ufg
    sum_cvr += cvr
    
print(f'precision@{TOP_K} : {round(sum_precision/len(eval_users_dict), 4)}, ndcg@{TOP_K} : {round(sum_ndcg/len(eval_users_dict), 4)}')
print(f'PropFair : {round(sum_propfair/len(eval_users_dict), 4)}, CVR : {round(sum_cvr/len(eval_users_dict), 4)}, UFG : {round(sum_ufg/len(eval_users_dict), 4)}')

precision@10 : 0.4322, ndcg@10 : 0.4182
PropFair : 0.2479, CVR : 0.4322, UFG : 1.1064
