In [1]:
#Dependencies
import pandas as pd
import numpy as np
import tensorflow as tf
import itertools
import matplotlib.pyplot as plt
import time

from envs1 import OfflineEnv
from recommender import DRRAgent

STATE_SIZE = 10

In [2]:
movies_df=pd.read_csv('./movies.dat')
movies_list=movies_df.values.tolist()
movies_id_to_movies = {movie[0]: movie[1:] for movie in movies_list}
movies_df['MovieID'] = movies_df['MovieID'].apply(pd.to_numeric)
ratings_df=pd.read_csv('./ratings.dat')
ratings_df = ratings_df.applymap(int)

In [3]:
users_dict = np.load('./user_dict.npy', allow_pickle=True)
users_history_lens = np.load('./users_histroy_len.npy')

In [4]:
# 유저별로 본 영화들 순서대로 정리
users_dict = {user : [] for user in set(ratings_df["UserID"])}
users_dict[1]

[]

In [5]:
# sort by time
ratings_df = ratings_df.sort_values(by='Timestamp', ascending=True)
ratings_df.head(5)

Unnamed: 0,UserID,MovieID,Rating,Timestamp
0,259,255,4,874724710
1,259,286,4,874724727
2,259,298,4,874724754
3,259,185,4,874724781
4,259,173,4,874724843


In [6]:
# Put (movie, rating) pairs in user dictionary
# Only movies with a rating of 4 or higher are counted for each user's movie history length.
ratings_df_gen = ratings_df.iterrows()
users_dict_for_history_len = {user : [] for user in set(ratings_df["UserID"])}
for data in ratings_df_gen:
    users_dict[data[1]['UserID']].append((data[1]['MovieID'], data[1]['Rating']))
    if data[1]['Rating'] >= 1:
        users_dict_for_history_len[data[1]['UserID']].append((data[1]['MovieID'], data[1]['Rating']))

In [7]:
# Movie history length for each user
users_history_lens = [len(users_dict_for_history_len[u]) for u in set(ratings_df["UserID"])]

In [8]:
len(users_history_lens)

943

In [9]:
users_num = max(ratings_df["UserID"])+1
items_num = max(ratings_df["MovieID"])+1

In [10]:
print(users_num, items_num)

944 1683


### Training setting

In [11]:
train_users_num = int(users_num * 0.8)
train_items_num = items_num
print(train_users_num, train_items_num)

755 1683


In [12]:
train_users_dict = {k:users_dict[k] for k in range(1, train_users_num+1)}
train_users_history_lens = users_history_lens[:train_users_num]
print(len(train_users_dict),len(train_users_history_lens))

755 755


### Evaluating setting

In [13]:
eval_users_num = int(users_num * 0.2)
eval_items_num = items_num
print(eval_users_num, eval_items_num)

188 1683


In [14]:
eval_users_dict = {k:users_dict[k] for k in range(users_num-eval_users_num, users_num)}
eval_users_history_lens = users_history_lens[-eval_users_num:]
print(len(eval_users_dict),len(eval_users_history_lens))

188 188


### Evalutation

In [15]:
def evaluate(recommender, env, check_movies = False, top_k=False, length = False):

    # episodic reward 
    mean_precision = 0
    mean_ndcg = 0

  # episodic reward
    episode_reward = 0
    steps = 0
    q_loss1 = 0
    q_loss2 = 0
    countl = 0
    correct_list = []
    
    # Environment 
    user_id, items_ids, done = env.reset()

    while not done:
#         print("user_id :",user_id)
        # Observe current state & Find action
        ## Embedding
        user_eb = recommender.embedding_network.get_layer('user_embedding')(np.array(user_id))
        items_eb = recommender.embedding_network.get_layer('movie_embedding')(np.array(items_ids))
        ## SRM state 
        state = recommender.srm_ave([np.expand_dims(user_eb, axis=0), np.expand_dims(items_eb, axis=0)])
        ## Action(ranking score) 
        action1 = recommender.actor.network(state)
        action2 = recommender.actor2.network(state)
        
        q11 = recommender.critic.network([action1, state])
        q12 = recommender.critic2.network([action2, state])

        action = action1 if q11 >= q12 else action2
        
        
        ## Item 
        recommended_item = recommender.recommend_item(action, env.recommended_items, top_k=top_k)

        next_items_ids, reward, done, _ = env.step(recommended_item, top_k=top_k)
        
#         print("done :",done)

        if countl < length:
            countl += 1
#             print("countl :",countl)
            correct_list.append(reward)
            if done == True or countl == length:
                dcg, idcg = calculate_ndcg(correct_list, [1 for _ in range(len(correct_list))])
#                 print("dcg :", dcg, "idcg :", idcg)
                mean_ndcg += dcg/idcg
#                 print("mean_ndcg :",mean_ndcg)

                #precision
                correct_list1 = [1 if r > 0 else 0 for r in correct_list]
                correct_num = length-correct_list1.count(0)
                mean_precision += correct_num/length
                   
        items_ids = next_items_ids
        episode_reward += reward
        steps += 1
        
    return mean_precision, mean_ndcg, reward

def calculate_ndcg(rel, irel):
    dcg = 0
    idcg = 0
    rel = [1 if r>0 else 0 for r in rel]
    for i, (r, ir) in enumerate(zip(rel, irel)):
        dcg += (r)/np.log2(i+2)
        idcg += (ir)/np.log2(i+2)
    return dcg, idcg

In [16]:
tf.keras.backend.set_floatx('float64')

In [17]:
sum_precision = 0
sum_ndcg = 0
sum_reward = 0
for i in range(1,41):
    sum_precision = 0
    sum_ndcg = 0
    sum_reward = 0
    length_k = 5
    for user_id in eval_users_dict.keys():
        env = OfflineEnv(eval_users_dict, users_history_lens, movies_id_to_movies, STATE_SIZE, fix_user_id=user_id)
        recommender = DRRAgent(env, users_num, items_num, STATE_SIZE)
        recommender.actor.build_networks()
        recommender.actor2.build_networks()
        recommender.critic.build_networks()
        recommender.critic2.build_networks()
        recommender.load_model(f"./save_weights/actor_{i*500}_fixed.h5", 
                               f"./save_weights/actor2_{i*500}_fixed.h5", 
                               f"./save_weights/critic_{i*500}_fixed.h5",
                              f"./save_weights/critic2_{i*500}_fixed.h5")
        precision, ndcg, reward = evaluate(recommender, env, top_k= False, length = length_k)
        sum_precision += precision
        sum_ndcg += ndcg
        sum_reward += reward

    print(f'Model: {i*500}, reward:{sum_reward}, precision@{length_k} : {sum_precision/len(eval_users_dict)}, ndcg@{length_k} : {sum_ndcg/len(eval_users_dict)}')

Model: 500, reward:-39.0, precision@5 : 0.1851063829787233, ndcg@5 : 0.1956842090837017
Model: 1000, reward:-20.5, precision@5 : 0.21489361702127663, ndcg@5 : 0.22436299053649636
Model: 1500, reward:-15.5, precision@5 : 0.2553191489361703, ndcg@5 : 0.27831006952921034
Model: 2000, reward:-12.0, precision@5 : 0.25851063829787246, ndcg@5 : 0.27417933604953676
Model: 2500, reward:6.5, precision@5 : 0.2957446808510639, ndcg@5 : 0.31150673905266857
Model: 3000, reward:-14.5, precision@5 : 0.2819148936170215, ndcg@5 : 0.2945260671099737
Model: 3500, reward:1.0, precision@5 : 0.272340425531915, ndcg@5 : 0.2823853482572797
Model: 4000, reward:-12.0, precision@5 : 0.28829787234042553, ndcg@5 : 0.30595426944210047
Model: 4500, reward:-9.0, precision@5 : 0.23829787234042568, ndcg@5 : 0.2462435267428693
Model: 5000, reward:-27.0, precision@5 : 0.2276595744680851, ndcg@5 : 0.2394783401179719
Model: 5500, reward:-19.5, precision@5 : 0.25000000000000006, ndcg@5 : 0.2654594830636648
Model: 6000, rewar