In [None]:
# Run on google colab only
from google.colab import drive
drive.mount('/content/drive')
root_path = '/content/drive/MyDrive/DRL-based-Recommendation'

import os
os.chdir(root_path)

In [1]:
from model import Actor, Critic, DRRAveStateRepresentation, PMF
from learn import DRRTrainer
from utils.general import csv_plot
import torch
import pickle
import numpy as np
import random
import os
import datetime

import matplotlib.pyplot as plt

%matplotlib inline

In [2]:
class config():
    output_path = 'results/' + datetime.datetime.now().strftime('%y%m%d') + '/'
    if not os.path.exists(output_path):
        os.makedirs(output_path)
    plot_dir = output_path + 'rewards.pdf'

    train_actor_loss_data_dir = output_path + 'train_actor_loss_data.npy'
    train_critic_loss_data_dir = output_path + 'train_critic_loss_data.npy'
    train_mean_reward_data_dir = output_path + 'train_mean_reward_data.npy'

    train_actor_loss_plot_dir = output_path + 'train_actor_loss.png'
    train_critic_loss_plot_dir = output_path + 'train_critic_loss.png'
    train_mean_reward_plot_dir = output_path + 'train_mean_reward.png'

    trained_models_dir = 'trained/'

    actor_model_trained = trained_models_dir + 'actor_net.weights'
    critic_model_trained = trained_models_dir + 'critic_net.weights'
    state_rep_model_trained = trained_models_dir + 'state_rep_net.weights'

    actor_model_dir = output_path + 'actor_net.weights'
    critic_model_dir = output_path + 'critic_net.weights'
    state_rep_model_dir = output_path + 'state_rep_net.weights'

    csv_dir = output_path + 'log.csv'

    path_to_trained_pmf = trained_models_dir + 'trained_pmf.pt'

    # hyperparams
    batch_size = 64
    gamma = 0.9
    replay_buffer_size = 100000
    history_buffer_size = 5
    learning_start = 5000
    learning_freq = 1
    lr_state_rep = 0.001
    lr_actor = 0.0001
    lr_critic = 0.001
    eps_start = 1
    eps = 0.1
    eps_steps = 10000
    eps_eval = 0.1
    tau = 0.01 # inital 0.001
    beta = 0.4
    prob_alpha = 0.3
    max_timesteps_train = 100000
    max_epochs_offline = 1000
    max_timesteps_online = 10000
    embedding_feature_size = 100
    episode_length = 10
    train_ratio = 0.8
    weight_decay = 0.01
    clip_val = 1.0
    log_freq = 100
    saving_freq = 1000
    zero_reward = False
    enable_cuda = False

def seed_all(cuda, seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if cuda:
        torch.cuda.empty_cache()
        torch.cuda.manual_seed(seed=seed)

In [3]:
# Get CUDA device if available
cuda = True if config.enable_cuda and torch.cuda.is_available() else False
print("Using CUDA") if cuda else print("Using CPU")

# Init seeds
seed_all(cuda, 0)

# Grab models
actor_function = Actor
critic_function = Critic
state_rep_function = DRRAveStateRepresentation

# Import Data
users = pickle.load(open('dataset/user_id_to_num.pkl', 'rb'))
items = pickle.load(open('dataset/item_id_to_num.pkl', 'rb'))
data = np.load('dataset/data.npy')

# Normalize rewards to [0, 1]
data[:, 0] = np.where(np.isin(data[:, 0], [1, 2]), -1, 
                      np.where(data[:, 0] == 3, 0, 1))
print(data)

np.random.shuffle(data)
train_data = torch.from_numpy(data[:int(config.train_ratio * data.shape[0])])
test_data = torch.from_numpy(data[int(config.train_ratio * data.shape[0]):])

# Create and load PMF function for rewards and embeddings
n_users = len(users)
n_items = len(items)
reward_function = PMF(n_users, n_items, config.embedding_feature_size, is_sparse=False, enable_cuda=cuda)
reward_function.load_state_dict(torch.load(config.path_to_trained_pmf))

# Freeze all the parameters in the network
for param in reward_function.parameters():
    param.requires_grad = False
print("Initialized PMF, imported weights, created reward_function")

# Extract embeddings
user_embeddings = reward_function.user_embeddings.weight.data
item_embeddings = reward_function.item_embeddings.weight.data
print("Extracted user and item embeddings from PMF")
print("User embeddings shape: ", user_embeddings.shape)
print("Item embeddings shape: ", item_embeddings.shape)

# Init trainer
print("Initializing DRRTrainer -------------------------------------------------------------------------------")
trainer = DRRTrainer(config,
                      actor_function,
                      critic_function,
                      state_rep_function,
                      reward_function,
                      users,
                      items,
                      train_data,
                      test_data,
                      user_embeddings,
                      item_embeddings,
                      cuda
                      )

Using CPU
[[        1 978300760         0      1176]
 [        0 978302109         0       655]
 [        0 978301968         0       902]
 ...
 [        1 956704746      6039       558]
 [        1 956715648      6039      1080]
 [        1 956715569      6039      1081]]


  reward_function.load_state_dict(torch.load(config.path_to_trained_pmf))


Initialized PMF, imported weights, created reward_function
Extracted user and item embeddings from PMF
User embeddings shape:  torch.Size([6040, 100])
Item embeddings shape:  torch.Size([3883, 100])
Initializing DRRTrainer -------------------------------------------------------------------------------
Current PyTorch Device:  cpu
Data dimensions extracted
Models initialized
Model weights initialized, copied to target
Optimizers initialized


In [4]:
%%time
# Train
actor_losses, critic_losses, epi_avg_rewards = trainer.learn()

  ignored_items.append(torch.tensor(rec_item_idx).to(self.device))


Timestep 100 | Episode 9 | Mean Ep R 0.7043 | Max R 0.7043 | Critic Params Norm 0.2281 | Actor Loss -1.5070 | Critic Loss 0.0759 | 
Timestep 200 | Episode 19 | Mean Ep R 0.5441 | Max R 0.5441 | Critic Params Norm 0.2815 | Actor Loss -2.3864 | Critic Loss 0.1259 | 
Timestep 300 | Episode 29 | Mean Ep R 0.0873 | Max R 0.0873 | Critic Params Norm 0.7779 | Actor Loss -2.6028 | Critic Loss 0.1050 | 
Timestep 400 | Episode 39 | Mean Ep R 0.6597 | Max R 0.6597 | Critic Params Norm 0.1759 | Actor Loss -3.3949 | Critic Loss 0.0619 | 
Timestep 500 | Episode 49 | Mean Ep R 0.5815 | Max R 0.5815 | Critic Params Norm 0.2993 | Actor Loss -4.0478 | Critic Loss 0.1206 | 
Timestep 600 | Episode 59 | Mean Ep R 0.7850 | Max R 0.7850 | Critic Params Norm 0.9219 | Actor Loss -4.1906 | Critic Loss 0.1256 | 
Timestep 700 | Episode 69 | Mean Ep R -1.1879 | Max R -1.1879 | Critic Params Norm 0.5216 | Actor Loss -4.7983 | Critic Loss 0.1468 | 
Timestep 800 | Episode 79 | Mean Ep R 0.7534 | Max R 0.7534 | Critic

In [None]:
# Change to newest trained data directories
# config.trained_models_dir = config.output_path
# output_path = config.output_path
config.trained_models_dir = "results/241206/"
output_path = "results/241206/"

train_actor_loss_data_dir = output_path + 'train_actor_loss_data.npy'
train_critic_loss_data_dir = output_path + 'train_critic_loss_data.npy'
train_mean_reward_data_dir = output_path + 'train_mean_reward_data.npy'

config.actor_model_trained = config.trained_models_dir + 'actor_net.weights'
config.critic_model_trained = config.trained_models_dir + 'critic_net.weights'
config.state_rep_model_trained = config.trained_models_dir + 'state_rep_net.weights'

In [6]:
!pip install tsmoothie

def noiseless_plot(y, title, ylabel, save_loc):
  # operate smoothing
  smoother = ConvolutionSmoother(window_len=1000, window_type='ones')
  smoother.smooth(y)

  # generate intervals
  low, up = smoother.get_intervals('sigma_interval', n_sigma=3)

  # plot the smoothed timeseries with intervals
  plt.close()
  plt.figure(figsize=(11,6))
  plt.xlabel("Epoch")
  plt.ylabel(ylabel)
  plt.title(title)
  plt.plot(smoother.data[0], color='orange')
  plt.plot(smoother.smooth_data[0], linewidth=3, color='blue')
  plt.fill_between(range(len(smoother.data[0])), low[0], up[0], alpha=0.3)
  plt.savefig(save_loc)
  plt.close()


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.1.2[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [7]:
import matplotlib.pyplot as plt
from tsmoothie.smoother import *

actor_losses = np.load(train_actor_loss_data_dir)
critic_losses = np.load(train_critic_loss_data_dir)
epi_avg_rewards = np.load(train_mean_reward_data_dir)

noiseless_plot(actor_losses,
               "Actor Loss (Train)",
               "Actor Loss (Train)",
               output_path + "train_actor_loss_smooth.png")

noiseless_plot(critic_losses,
               "Critic Loss (Train)",
               "Critic Loss (Train)",
               output_path + "train_critic_loss_smooth.png")

noiseless_plot(epi_avg_rewards,
               "Mean Reward (Train)",
               "Mean Reward (Train)",
               output_path + "train_mean_reward_smooth.png")

In [8]:
sourceFile = open(output_path + "hyperparams.txt", 'w')
print(config.__dict__, file = sourceFile)
sourceFile.close()

In [9]:
# Offline evaluate
list_top_k = [5, 10]

print("Start DRR")
for top_k in list_top_k:
  precisions = []
  ndcgs = []
  for i in range(10):
    print(f"DRR - Time #{i}:")
    # Evaluate
    avg_precision, avg_ndcg = trainer.offline_evaluate(top_k)

    # Append to list
    precisions.append(avg_precision)
    ndcgs.append(avg_ndcg)

  # Save data
  precisions = np.array(precisions)
  np.save(output_path + f'avg_precision@{top_k}_offline_eval.npy', precisions)
  ndcgs = np.array(ndcgs)
  np.save(output_path + f'avg_ndcg@{top_k}_offline_eval.npy', ndcgs)

  # Save
  sourceFile = open(output_path + f'avg_ndcg@{top_k}_offline_eval.txt', 'w')
  print(f'Average Precision@{top_k} (Eval): {np.mean(precisions)}', file=sourceFile)
  sourceFile.close()
  sourceFile = open(output_path + f'avg_precision@{top_k}_offline_eval.txt', 'w')
  print(f'Average NDCG@{top_k} (Eval): {np.mean(ndcgs)}', file=sourceFile)
  sourceFile.close()

Start DRR
DRR - Time #0:
Episode 20 | Precision@5 1.0 | Avg Precision@5 0.8000 | NDCG@5 1.0 | Avg NDCG@5 0.8988 | 
Episode 40 | Precision@5 0.6 | Avg Precision@5 0.7750 | NDCG@5 0.7328286170959473 | Avg NDCG@5 0.8798 | 
Episode 60 | Precision@5 0.8 | Avg Precision@5 0.7700 | NDCG@5 0.904717206954956 | Avg NDCG@5 0.8958 | 
Episode 80 | Precision@5 0.6 | Avg Precision@5 0.7650 | NDCG@5 0.9060254693031311 | Avg NDCG@5 0.8980 | 
Episode 100 | Precision@5 0.8 | Avg Precision@5 0.7620 | NDCG@5 0.7606395483016968 | Avg NDCG@5 0.8971 | 


  self.state_rep_net.load_state_dict(torch.load(self.config.state_rep_model_trained))
  self.actor_net.load_state_dict(torch.load(self.config.actor_model_trained))
  self.critic_net.load_state_dict(torch.load(self.config.critic_model_trained))


Episode 120 | Precision@5 0.8 | Avg Precision@5 0.7667 | NDCG@5 1.0 | Avg NDCG@5 0.9037 | 
Episode 140 | Precision@5 0.8 | Avg Precision@5 0.7671 | NDCG@5 1.0 | Avg NDCG@5 0.9039 | 
Episode 160 | Precision@5 1.0 | Avg Precision@5 0.7625 | NDCG@5 1.0 | Avg NDCG@5 0.9046 | 
Episode 180 | Precision@5 0.6 | Avg Precision@5 0.7667 | NDCG@5 0.9060254693031311 | Avg NDCG@5 0.9032 | 
Episode 200 | Precision@5 0.8 | Avg Precision@5 0.7640 | NDCG@5 1.0 | Avg NDCG@5 0.9065 | 
Episode 220 | Precision@5 1.0 | Avg Precision@5 0.7682 | NDCG@5 1.0 | Avg NDCG@5 0.9081 | 
Episode 240 | Precision@5 0.8 | Avg Precision@5 0.7742 | NDCG@5 0.904717206954956 | Avg NDCG@5 0.9100 | 
Episode 260 | Precision@5 0.6 | Avg Precision@5 0.7723 | NDCG@5 0.9060254693031311 | Avg NDCG@5 0.9077 | 
Episode 280 | Precision@5 1.0 | Avg Precision@5 0.7771 | NDCG@5 1.0 | Avg NDCG@5 0.9090 | 
Episode 300 | Precision@5 0.6 | Avg Precision@5 0.7800 | NDCG@5 0.6182885766029358 | Avg NDCG@5 0.9102 | 
Episode 320 | Precision@5 0.8 |

In [None]:
list_top_k = [5, 10]
for top_k in list_top_k:
    # read data
    precisions = np.load(output_path + f'avg_precision@{top_k}_offline_eval.npy')
    ndcgs = np.load(output_path + f'avg_ndcg@{top_k}_offline_eval.npy')

    avg_precision = np.mean(precisions)
    avg_ndcg = np.mean(ndcgs)
    
    # result
    print(f"Top@{top_k} Results:")
    print(f"Average Precision@{top_k}: {avg_precision}")
    print(f"Average NDCG@{top_k}: {avg_ndcg}")