In [1]:
!git clone https://github.com/pkulwj1994/RL_Reco.git
!cp -a /content/RL_Reco/. /content/.

Cloning into 'Recommender_system_via_deep_RL'...
remote: Enumerating objects: 400, done.[K
remote: Counting objects: 100% (400/400), done.[K
remote: Compressing objects: 100% (286/286), done.[K
remote: Total 400 (delta 213), reused 287 (delta 111), pack-reused 0[K
Receiving objects: 100% (400/400), 58.37 MiB | 27.56 MiB/s, done.
Resolving deltas: 100% (213/213), done.


In [2]:
!pip install wandb
!wget https://files.grouplens.org/datasets/movielens/ml-1m.zip
!unzip ./ml-1m.zip

Collecting wandb
  Downloading wandb-0.12.9-py2.py3-none-any.whl (1.7 MB)
[?25l[K     |▏                               | 10 kB 36.5 MB/s eta 0:00:01[K     |▍                               | 20 kB 30.7 MB/s eta 0:00:01[K     |▋                               | 30 kB 18.5 MB/s eta 0:00:01[K     |▊                               | 40 kB 15.8 MB/s eta 0:00:01[K     |█                               | 51 kB 10.3 MB/s eta 0:00:01[K     |█▏                              | 61 kB 11.4 MB/s eta 0:00:01[K     |█▍                              | 71 kB 10.1 MB/s eta 0:00:01[K     |█▌                              | 81 kB 11.2 MB/s eta 0:00:01[K     |█▊                              | 92 kB 11.6 MB/s eta 0:00:01[K     |██                              | 102 kB 9.9 MB/s eta 0:00:01[K     |██                              | 112 kB 9.9 MB/s eta 0:00:01[K     |██▎                             | 122 kB 9.9 MB/s eta 0:00:01[K     |██▌                             | 133 kB 9.9 MB/s eta 0:00

In [4]:
#Dependencies
import pandas as pd
import numpy as np
import tensorflow as tf
import itertools
import matplotlib.pyplot as plt
import time

from envs import OfflineEnv
from recommender import DRRAgent

import os

ROOT_DIR = os.getcwd()
DATA_DIR = os.path.join(ROOT_DIR, 'ml-1m/')
STATE_SIZE = 10

In [5]:
#Loading datasets
ratings_list = [i.strip().split("::") for i in open(os.path.join(DATA_DIR,'ratings.dat'), 'r').readlines()]
users_list = [i.strip().split("::") for i in open(os.path.join(DATA_DIR,'users.dat'), 'r').readlines()]
movies_list = [i.strip().split("::") for i in open(os.path.join(DATA_DIR,'movies.dat'),encoding='latin-1').readlines()]
ratings_df = pd.DataFrame(ratings_list, columns = ['UserID', 'MovieID', 'Rating', 'Timestamp'], dtype = np.uint32)
movies_df = pd.DataFrame(movies_list, columns = ['MovieID', 'Title', 'Genres'])
movies_df['MovieID'] = movies_df['MovieID'].apply(pd.to_numeric)

In [6]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [7]:
# 电影 ID 作为电影名称
movies_id_to_movies = {movie[0]: movie[1:] for movie in movies_list}

In [8]:
len(movies_list)

3883

In [9]:
ratings_df.head(5)

Unnamed: 0,UserID,MovieID,Rating,Timestamp
0,1,1193,5,978300760
1,1,661,3,978302109
2,1,914,3,978301968
3,1,3408,4,978300275
4,1,2355,5,978824291


In [11]:
# # 检查用户选择的电影的分布
# plt.figure(figsize=(20,10))
# plt.hist(ratings_df["MovieID"], bins=3883)
# plt.show()

In [12]:
# 检查缺失值
ratings_df.isnull().sum()

UserID       0
MovieID      0
Rating       0
Timestamp    0
dtype: int64

In [13]:
# 检查最大值
print(len(set(ratings_df["UserID"])) == max([int(i) for i in set(ratings_df["UserID"])]))
print(max([int(i) for i in set(ratings_df["UserID"])]))

True
6040


In [14]:
ratings_df = ratings_df.applymap(int)

In [15]:
# 按用户观看电影的顺序排列
users_dict = {user : [] for user in set(ratings_df["UserID"])}
users_dict[1]

[]

In [16]:
# 按时间排序
ratings_df = ratings_df.sort_values(by='Timestamp', ascending=True)
ratings_df.head(5)

Unnamed: 0,UserID,MovieID,Rating,Timestamp
1000138,6040,858,4,956703932
1000153,6040,2384,4,956703954
999873,6040,593,5,956703954
1000007,6040,1961,4,956703977
1000192,6040,2019,5,956703977


In [17]:
# 将（电影，评分）对放入用户字典中
# 每个用户的电影历史长度仅计算评分为 4 或更高的电影。
ratings_df_gen = ratings_df.iterrows()
users_dict_for_history_len = {user : [] for user in set(ratings_df["UserID"])}
for data in ratings_df_gen:
    users_dict[data[1]['UserID']].append((data[1]['MovieID'], data[1]['Rating']))
    if data[1]['Rating'] >= 4:
        users_dict_for_history_len[data[1]['UserID']].append((data[1]['MovieID'], data[1]['Rating']))

In [18]:
# 每个用户的电影历史长度
users_history_lens = [len(users_dict_for_history_len[u]) for u in set(ratings_df["UserID"])]

In [19]:
len(users_history_lens)

6040

In [20]:
users_dict[1]

[(3186, 4),
 (1721, 4),
 (1270, 5),
 (1022, 5),
 (2340, 3),
 (1836, 5),
 (3408, 4),
 (1207, 4),
 (2804, 5),
 (260, 4),
 (720, 3),
 (1193, 5),
 (919, 4),
 (608, 4),
 (2692, 4),
 (1961, 5),
 (2028, 5),
 (3105, 5),
 (938, 4),
 (1035, 5),
 (1962, 4),
 (1028, 5),
 (2018, 4),
 (150, 5),
 (1097, 4),
 (914, 3),
 (1287, 5),
 (2797, 4),
 (1246, 4),
 (2762, 4),
 (661, 3),
 (2918, 4),
 (531, 4),
 (3114, 4),
 (2791, 4),
 (1029, 5),
 (2321, 3),
 (1197, 3),
 (594, 4),
 (2398, 4),
 (1545, 4),
 (527, 5),
 (745, 3),
 (595, 5),
 (588, 4),
 (1, 5),
 (2687, 3),
 (783, 4),
 (2294, 4),
 (2355, 5),
 (1907, 4),
 (1566, 4),
 (48, 5)]

In [21]:
np.save("./data/user_dict.npy", users_dict)
np.save("./data/users_histroy_len.npy", users_history_lens)

In [22]:
users_num = max(ratings_df["UserID"])+1
items_num = max(ratings_df["MovieID"])+1

In [23]:
print(users_num, items_num)

6041 3953


### Training setting

In [24]:
train_users_num = int(users_num * 0.8)
train_items_num = items_num
print(train_users_num, train_items_num)

4832 3953


In [25]:
train_users_dict = {k:users_dict[k] for k in range(1, train_users_num+1)}
train_users_history_lens = users_history_lens[:train_users_num]
print(len(train_users_dict),len(train_users_history_lens))

4832 4832


### Evaluating setting

In [26]:
eval_users_num = int(users_num * 0.2)
eval_items_num = items_num
print(eval_users_num, eval_items_num)

1208 3953


In [27]:
eval_users_dict = {k:users_dict[k] for k in range(users_num-eval_users_num, users_num)}
eval_users_history_lens = users_history_lens[-eval_users_num:]
print(len(eval_users_dict),len(eval_users_history_lens))

1208 1208


### 准备好
users_dict, users_history_len, movies_id_to_movies, sers_num, items_num

### Evalutation

In [29]:
def evaluate(recommender, env, check_movies = False, top_k=False):

    # episodic reward 重启
    episode_reward = 0
    steps = 0
    mean_precision = 0
    mean_ndcg = 0
    # Environment 重启
    user_id, items_ids, done = env.reset()
    if check_movies:
        print(f'user_id : {user_id}, rated_items_length:{len(env.user_items)}')
        print('items : \n', np.array(env.get_items_names(items_ids)))

    while not done:

        # Observe current state & Find action
        ## Embedding
        user_eb = recommender.embedding_network.get_layer('user_embedding')(np.array(user_id))
        items_eb = recommender.embedding_network.get_layer('movie_embedding')(np.array(items_ids))
        ## 状态输出到 SRM
        state = recommender.srm_ave([np.expand_dims(user_eb, axis=0), np.expand_dims(items_eb, axis=0)])
        ## Action(ranking score) 
        action = recommender.actor.network(state)
        ## Item 
        recommended_item = recommender.recommend_item(action, env.recommended_items, top_k=top_k)
        if check_movies:
            print(f'recommended items ids : {recommended_item}')
            print(f'recommened items : \n {np.array(env.get_items_names(recommended_item), dtype=object)}')
        # Calculate reward & observe new state (in env)
        ## Step
        next_items_ids, reward, done, _ = env.step(recommended_item, top_k=top_k)
        if top_k:
            correct_list = [1 if r > 0 else 0 for r in reward]
            # ndcg
            dcg, idcg = calculate_ndcg(correct_list, [1 for _ in range(len(reward))])
            mean_ndcg += dcg/idcg
            
            #precision
            correct_num = top_k-correct_list.count(0)
            mean_precision += correct_num/top_k
            
        reward = np.sum(reward)
        items_ids = next_items_ids
        episode_reward += reward
        steps += 1
        
        if check_movies:
            print(f'precision : {correct_num/top_k}, dcg : {dcg:0.3f}, idcg : {idcg:0.3f}, ndcg : {dcg/idcg:0.3f}, reward : {reward}')
            print()
        break
    
    if check_movies:
        print(f'precision : {mean_precision/steps}, ngcg : {mean_ndcg/steps}, episode_reward : {episode_reward}')
        print()
    
    return mean_precision/steps, mean_ndcg/steps

def calculate_ndcg(rel, irel):
    dcg = 0
    idcg = 0
    rel = [1 if r>0 else 0 for r in rel]
    for i, (r, ir) in enumerate(zip(rel, irel)):
        dcg += (r)/np.log2(i+2)
        idcg += (ir)/np.log2(i+2)
    return dcg, idcg

In [30]:
tf.keras.backend.set_floatx('float64')

In [31]:
sum_precision = 0
sum_ndcg = 0
TOP_K = 10

for user_id in eval_users_dict.keys():
    env = OfflineEnv(eval_users_dict, users_history_lens, movies_id_to_movies, STATE_SIZE, fix_user_id=user_id)
    recommender = DRRAgent(env, users_num, items_num, STATE_SIZE)
    recommender.actor.build_networks()
    recommender.critic.build_networks()
    recommender.load_model('/content/drive/MyDrive/RL_Reco/save_weights/actor_3000_fixed.h5', 
                           '/content/drive/MyDrive/RL_Reco/save_weights/critic_3000_fixed.h5')
    precision, ndcg = evaluate(recommender, env, top_k=TOP_K)
    sum_precision += precision
    sum_ndcg += ndcg
    
print(f'precision@{TOP_K} : {sum_precision/len(eval_users_dict)}, ndcg@{TOP_K} : {sum_ndcg/len(eval_users_dict)}')

precision@10 : 0.39817880794702076, ndcg@10 : 0.3805991142275761
precision@10 : 0.39817880794702076, ndcg@10 : 0.3805991142275761


In [36]:
sum_precision = 0
sum_ndcg = 0
TOP_K = 10

for user_id in eval_users_dict.keys():
    env = OfflineEnv(eval_users_dict, users_history_lens, movies_id_to_movies, STATE_SIZE, fix_user_id=user_id)
    recommender = DRRAgent(env, users_num, items_num, STATE_SIZE)
    recommender.actor.build_networks()
    recommender.critic.build_networks()
    recommender.load_model('/content/drive/MyDrive/RL_Reco/save_weights/actor_3000_fixed.h5', 
                           '/content/drive/MyDrive/RL_Reco/save_weights/critic_3000_fixed.h5')
    precision, ndcg = evaluate(recommender, env, top_k=10)
    sum_precision += precision
    sum_ndcg += ndcg
    
print(f'precision@{TOP_K} : {sum_precision/len(eval_users_dict)}, ndcg@{TOP_K} : {sum_ndcg/len(eval_users_dict)}')

precision@10 : 0.39875827814569614, ndcg@10 : 0.38257738152305487
