In [1]:
#Dependencies
import pandas as pd
import numpy as np
import tensorflow as tf
import itertools
import matplotlib.pyplot as plt
import time

from envs1 import OfflineEnv
from recommender import DRRAgent

STATE_SIZE = 10

In [2]:
movies_df=pd.read_csv('./movies.dat')
movies_list=movies_df.values.tolist()
movies_id_to_movies = {movie[0]: movie[1:] for movie in movies_list}
movies_df['MovieID'] = movies_df['MovieID'].apply(pd.to_numeric)
ratings_df=pd.read_csv('./ratings.dat')
ratings_df = ratings_df.applymap(int)

In [3]:
users_dict = np.load('./user_dict.npy', allow_pickle=True)
users_history_lens = np.load('./users_histroy_len.npy')

In [4]:
# 유저별로 본 영화들 순서대로 정리
users_dict = {user : [] for user in set(ratings_df["UserID"])}
users_dict[1]

[]

In [5]:
# sort by time
ratings_df = ratings_df.sort_values(by='Timestamp', ascending=True)
ratings_df.head(5)

Unnamed: 0,UserID,MovieID,Rating,Timestamp
0,259,255,4,874724710
1,259,286,4,874724727
2,259,298,4,874724754
3,259,185,4,874724781
4,259,173,4,874724843


In [6]:
# Put (movie, rating) pairs in user dictionary
# Only movies with a rating of 4 or higher are counted for each user's movie history length.
ratings_df_gen = ratings_df.iterrows()
users_dict_for_history_len = {user : [] for user in set(ratings_df["UserID"])}
for data in ratings_df_gen:
    users_dict[data[1]['UserID']].append((data[1]['MovieID'], data[1]['Rating']))
    if data[1]['Rating'] >= 1:
        users_dict_for_history_len[data[1]['UserID']].append((data[1]['MovieID'], data[1]['Rating']))

In [7]:
# Movie history length for each user
users_history_lens = [len(users_dict_for_history_len[u]) for u in set(ratings_df["UserID"])]

In [8]:
len(users_history_lens)

943

In [9]:
users_num = max(ratings_df["UserID"])+1
items_num = max(ratings_df["MovieID"])+1

In [10]:
print(users_num, items_num)

944 1683


### Training setting

In [11]:
train_users_num = int(users_num * 0.8)
train_items_num = items_num
print(train_users_num, train_items_num)

755 1683


In [12]:
train_users_dict = {k:users_dict[k] for k in range(1, train_users_num+1)}
train_users_history_lens = users_history_lens[:train_users_num]
print(len(train_users_dict),len(train_users_history_lens))

755 755


### Evaluating setting

In [13]:
eval_users_num = int(users_num * 0.2)
eval_items_num = items_num
print(eval_users_num, eval_items_num)

188 1683


In [14]:
eval_users_dict = {k:users_dict[k] for k in range(users_num-eval_users_num, users_num)}
eval_users_history_lens = users_history_lens[-eval_users_num:]
print(len(eval_users_dict),len(eval_users_history_lens))

188 188


In [15]:
eval_users_dict

{756: [(258, 3),
  (300, 4),
  (173, 3),
  (1, 4),
  (1149, 5),
  (171, 4),
  (275, 3),
  (1009, 4),
  (30, 4),
  (256, 4),
  (367, 4),
  (8, 4),
  (235, 3),
  (135, 2),
  (731, 3),
  (289, 4),
  (92, 3),
  (1652, 1),
  (527, 3),
  (1274, 2),
  (1119, 4),
  (71, 3),
  (9, 2),
  (22, 3),
  (50, 4),
  (228, 3),
  (96, 4),
  (89, 4),
  (147, 4),
  (403, 2),
  (117, 4),
  (176, 4),
  (210, 4),
  (568, 3),
  (195, 3),
  (118, 2),
  (399, 2),
  (222, 2),
  (230, 3),
  (550, 2),
  (554, 1),
  (121, 3),
  (3, 1),
  (95, 3),
  (99, 3),
  (588, 4),
  (432, 4),
  (501, 3),
  (473, 3),
  (418, 3),
  (1240, 4),
  (420, 4),
  (197, 2),
  (97, 3),
  (274, 3),
  (421, 4),
  (155, 4),
  (111, 4),
  (66, 4),
  (739, 4),
  (88, 1),
  (591, 4),
  (642, 2),
  (234, 3),
  (159, 4),
  (79, 4),
  (742, 3),
  (860, 1),
  (226, 3),
  (566, 4),
  (983, 2),
  (930, 3),
  (123, 2),
  (53, 3),
  (419, 3),
  (151, 4),
  (755, 3),
  (423, 3),
  (82, 3),
  (622, 3),
  (1031, 2),
  (225, 1),
  (138, 2),
  (404, 3),
  (

In [16]:
len(eval_users_dict[756])

109

In [17]:
eval_users_history_lens

[109,
 166,
 357,
 32,
 41,
 66,
 21,
 129,
 109,
 23,
 175,
 37,
 64,
 29,
 64,
 70,
 33,
 161,
 224,
 28,
 105,
 36,
 65,
 37,
 55,
 42,
 232,
 30,
 39,
 27,
 117,
 57,
 249,
 33,
 231,
 25,
 47,
 55,
 39,
 159,
 358,
 26,
 239,
 24,
 28,
 26,
 75,
 33,
 332,
 273,
 140,
 204,
 23,
 20,
 26,
 21,
 20,
 29,
 35,
 183,
 25,
 36,
 21,
 28,
 21,
 62,
 25,
 185,
 20,
 145,
 115,
 28,
 104,
 64,
 106,
 73,
 25,
 267,
 54,
 102,
 50,
 46,
 93,
 57,
 197,
 31,
 27,
 204,
 81,
 31,
 405,
 146,
 150,
 23,
 51,
 209,
 49,
 41,
 217,
 23,
 26,
 22,
 21,
 39,
 71,
 43,
 165,
 107,
 294,
 66,
 20,
 95,
 208,
 47,
 269,
 115,
 71,
 20,
 34,
 89,
 21,
 81,
 134,
 29,
 368,
 259,
 137,
 268,
 43,
 102,
 240,
 171,
 20,
 326,
 119,
 47,
 226,
 59,
 245,
 20,
 362,
 185,
 30,
 135,
 45,
 124,
 49,
 136,
 47,
 40,
 41,
 147,
 74,
 26,
 60,
 98,
 53,
 132,
 23,
 26,
 317,
 35,
 103,
 217,
 26,
 110,
 127,
 74,
 82,
 32,
 20,
 120,
 32,
 49,
 63,
 61,
 241,
 184,
 174,
 39,
 142,
 40,
 108,
 49,
 107,
 22

### Evalutation

In [18]:
def evaluate(recommender, env, check_movies = False, top_k=False, length = False):



   # episodic reward
    mean_precision = 0
    mean_ndcg = 0



 # episodic reward
    episode_reward = 0
    steps = 0
    q_loss1 = 0
    q_loss2 = 0
    countl = 0
    correct_list = []
    
    # Environment
    user_id, items_ids, done = env.reset()



    while not done:
#         print("user_id :",user_id)
        # Observe current state & Find action
        ## Embedding
        user_eb = recommender.embedding_network.get_layer('user_embedding')(np.array(user_id))
        items_eb = recommender.embedding_network.get_layer('movie_embedding')(np.array(items_ids))
        ## SRM state
        state = recommender.srm_ave([np.expand_dims(user_eb, axis=0), np.expand_dims(items_eb, axis=0)])
        ## Action(ranking score)
        action1 = recommender.actor.network(state)
        action2 = recommender.actor2.network(state)
        
        action = (action1 + action2)/2
        
        
        ## Item
        recommended_item = recommender.recommend_item(action, env.recommended_items, top_k=top_k)



        next_items_ids, reward, done, _ = env.step(recommended_item, top_k=top_k)
         
#         print("done :",done)



        if countl < length:
            countl += 1
#             print("countl :",countl)
            correct_list.append(reward)
            if done == True:
                dcg, idcg = calculate_ndcg(correct_list, [1 for _ in range(len(correct_list))])
#                 print("dcg :", dcg, "idcg :", idcg)
                mean_ndcg += dcg/idcg
#                 print("mean_ndcg :",mean_ndcg)



               #precision
                correct_list1 = [1 if r > 0 else 0 for r in correct_list]
                correct_num = length-correct_list1.count(0)
                mean_precision += correct_num/length
                   
        items_ids = next_items_ids
        episode_reward += reward
        steps += 1
#     print(mean_precision, mean_ndcg, reward)    
    return mean_precision, mean_ndcg, reward



def calculate_ndcg(rel, irel):
    dcg = 0
    idcg = 0
    rel = [1 if r>0 else 0 for r in rel]
    for i, (r, ir) in enumerate(zip(rel, irel)):
        dcg += (r)/np.log2(i+2)
        idcg += (ir)/np.log2(i+2)
    return dcg, idcg

In [19]:
tf.keras.backend.set_floatx('float64')

In [20]:
sum_precision = 0
sum_ndcg = 0
sum_reward = 0
for i in range(20,27):
    sum_precision = 0
    sum_ndcg = 0
    sum_reward = 0
    length_k = 10
    for user_id in eval_users_dict.keys():
        env = OfflineEnv(eval_users_dict, users_history_lens, movies_id_to_movies, STATE_SIZE, fix_user_id=user_id)
        recommender = DRRAgent(env, users_num, items_num, STATE_SIZE)
        recommender.actor.build_networks()
        recommender.actor2.build_networks()
        recommender.critic.build_networks()
        recommender.critic2.build_networks()
        recommender.load_model(f"./save_weights/actor_{i*500}_fixed.h5",
                               f"./save_weights/actor2_{i*500}_fixed.h5",
                               f"./save_weights/critic_{i*500}_fixed.h5",
                              f"./save_weights/critic2_{i*500}_fixed.h5")
        precision, ndcg, reward = evaluate(recommender, env, top_k= False, length = length_k)
        sum_precision += precision
        sum_ndcg += ndcg
        sum_reward += reward



    print(f'Model: {i*500}, reward:{sum_reward}, precision@{length_k} : {sum_precision/len(eval_users_dict)}, ndcg@{length_k} : {sum_ndcg/len(eval_users_dict)}')

Model: 10000, reward:-37.0, precision@10 : 0.2127659574468086, ndcg@10 : 0.22215694418857732
Model: 10500, reward:-20.0, precision@10 : 0.23723404255319164, ndcg@10 : 0.2483131347460613
Model: 11000, reward:-38.0, precision@10 : 0.22180851063829796, ndcg@10 : 0.2263519215807306
Model: 11500, reward:-3.0, precision@10 : 0.22712765957446832, ndcg@10 : 0.23633169693736114
Model: 12000, reward:-18.5, precision@10 : 0.2021276595744682, ndcg@10 : 0.21316891201818405
Model: 12500, reward:-3.0, precision@10 : 0.2095744680851065, ndcg@10 : 0.21849778442700798
Model: 13000, reward:-18.5, precision@10 : 0.21861702127659602, ndcg@10 : 0.22119942337080908
