In [1]:
#Dependencies
import pandas as pd
import numpy as np
import tensorflow as tf
import itertools
import matplotlib.pyplot as plt
import time

from src.environment.ml_env import OfflineEnv
from src.model.recommender import DRRAgent

import os

DATA_DIR = "data/ml-1m"
STATE_SIZE = 10

In [13]:
import json 
import pickle

dataset_path = "data/movie_lens_output_path.json"
with open(dataset_path) as json_file:
    _dataset_path = json.load(json_file)

with open(_dataset_path["train_users_dict"], "rb") as pkl_file:
    train_users_dict = pickle.load(pkl_file)
with open(_dataset_path["train_users_history_lens"], "rb") as pkl_file:
    train_users_history_lens = pickle.load(pkl_file)
with open(_dataset_path["eval_users_dict"], "rb") as pkl_file:
    eval_users_dict = pickle.load(pkl_file)
with open(_dataset_path["eval_users_history_lens"], "rb") as pkl_file:
    eval_users_history_lens = pickle.load(pkl_file)
with open(_dataset_path["users_history_lens"], "rb") as pkl_file:
    users_history_lens = pickle.load(pkl_file)
with open(_dataset_path["movies_id_to_movies"], "rb") as pkl_file:
    movies_id_to_movies = pickle.load(pkl_file)
with open(_dataset_path["movies_groups"], "rb") as pkl_file:
    movies_groups = pickle.load(pkl_file)

In [3]:
users_num = 6041
items_num = 3953

### Training setting

In [4]:
train_users_num = int(users_num * 0.8)
train_items_num = items_num
print(train_users_num, train_items_num)

4832 3953


### Evaluating setting

In [5]:
eval_users_num = int(users_num * 0.2)
eval_items_num = items_num
print(eval_users_num, eval_items_num)

1208 3953


### Evalutation

In [6]:
def evaluate(recommender, env, check_movies = False, top_k=False):
    # episodic reward
    episode_reward = 0
    steps = 0
    mean_precision = 0
    mean_ndcg = 0
    # Environment
    user_id, items_ids, done = env.reset()
    if check_movies:
        print(f'user_id : {user_id}, rated_items_length:{len(env.user_items)}')
        print('items : \n', np.array(env.get_items_names(items_ids)))

    while not done:

        # Observe current state & Find action
        ## Embedding
        user_eb = recommender.embedding_network.get_layer('user_embedding')(np.array(user_id))
        items_eb = recommender.embedding_network.get_layer('movie_embedding')(np.array(items_ids))
        ## SRM state
        state = recommender.srm_ave([np.expand_dims(user_eb, axis=0), np.expand_dims(items_eb, axis=0)])
        ## Action(ranking score) 
        action = recommender.actor.network(state)
        ## Item 
        recommended_item = recommender.recommend_item(action, env.recommended_items, top_k=top_k)
        if check_movies:
            print(f'recommended items ids : {recommended_item}')
            print(f'recommened items : \n {np.array(env.get_items_names(recommended_item), dtype=object)}')
        # Calculate reward and observe new state (in env)
        ## Step
        next_items_ids, reward, done, _ = env.step(recommended_item, top_k=top_k)
        if top_k:
            correct_list = [1 if r > 0 else 0 for r in reward]
            # ndcg
            dcg, idcg = calculate_ndcg(correct_list, [1 for _ in range(len(reward))])
            mean_ndcg += dcg/idcg
            
            #precision
            correct_num = top_k-correct_list.count(0)
            mean_precision += correct_num/top_k
            
        reward = np.sum(reward)
        items_ids = next_items_ids
        episode_reward += reward
        steps += 1
        
        if check_movies:
            print(f'precision : {correct_num/top_k}, dcg : {dcg:0.3f}, idcg : {idcg:0.3f}, ndcg : {dcg/idcg:0.3f}, reward : {reward}')
            print()
        break
    
    if check_movies:
        print(f'precision : {mean_precision/steps}, ngcg : {mean_ndcg/steps}, episode_reward : {episode_reward}')
        print()
    
    return mean_precision/steps, mean_ndcg/steps

def calculate_ndcg(rel, irel):
    dcg = 0
    idcg = 0
    rel = [1 if r>0 else 0 for r in rel]
    for i, (r, ir) in enumerate(zip(rel, irel)):
        dcg += (r)/np.log2(i+2)
        idcg += (ir)/np.log2(i+2)
    return dcg, idcg

In [7]:
tf.keras.backend.set_floatx('float64')

In [14]:
sum_precision = 0
sum_ndcg = 0
TOP_K = 10

for user_id in eval_users_dict.keys():
    env = OfflineEnv(
        eval_users_dict, 
        users_history_lens, 
        movies_id_to_movies, 
        movies_groups, 
        STATE_SIZE, 
        fix_user_id=user_id
    )
    recommender = DRRAgent(
        env, 
        users_num, 
        items_num, 
        STATE_SIZE,
        "model/movie_lens",
        "model/movie_lens/user_movie_at_once.h5",
        "movie_lens"
    )
    recommender.actor.build_networks()
    recommender.critic.build_networks()
    recommender.load_model('model/movie_lens/actor_8000.h5', 
                           'model/movie_lens/critic_8000.h5')
    precision, ndcg = evaluate(recommender, env, top_k=TOP_K)
    sum_precision += precision
    sum_ndcg += ndcg
    
print(f'precision@{TOP_K} : {sum_precision/len(eval_users_dict)}, ndcg@{TOP_K} : {sum_ndcg/len(eval_users_dict)}')

precision@10 : 0.40149006622516603, ndcg@10 : 0.3890252611393004
