# DataLab Cup 4: Recommender Systems

In [None]:
import os
import random
import csv
import ast
import sys

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from tqdm import tqdm
from collections import defaultdict

from evaluation.environment import TrainingEnvironment, TestingEnvironment
from queue import Queue
import pickle

In [None]:
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        # Currently, memory growth needs to be the same across GPUs
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        # Select GPU number 1
        tf.config.experimental.set_visible_devices(gpus[0], 'GPU')
        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
        # Memory growth must be set before GPUs have been initialized
        print(e)

## Hyper Parameters

In [None]:
# Official hyperparameters for this competition (do not modify)
N_TRAIN_USERS = 1000
N_TEST_USERS = 2000
N_ITEMS = 209527
HORIZON = 2000
TEST_EPISODES = 5
SLATE_SIZE = 5


# parameters
MODEL_PATH = "./model"
CKP_DIR = "./checkpoints/FunkSVD_CF_emb256_lr1e-4"

# FunkSVD hyperparameters
EMBEDDING_SIZE = 256
BATCH_SIZE = 128
INITIAL_N_EPOCHS = 100
N_EPOCHS = 50
LEARNING_RATE = 1e-4
TRAINING_EPISODES = 500


# Q-learning
# agent
MIN_EXPLORING_RATE = 0.01
MIN_LEARNING_RATE = 0.5

# training
NUM_EPISODE = 40000
PRINT_EVERY_EPISODE = 500

## Datasets

In [None]:
# Dataset paths
USER_DATA = os.path.join('dataset', 'user_data.json')
ITEM_DATA = os.path.join('dataset', 'item_data.json')
USER_ITEM_SIMILARITY = os.path.join('dataset', 'user_item_similarity.pkl')
INTERACT_DATA = os.path.join('dataset', 'organized_interact_data.pkl')
USER_RECOMMENDATION = os.path.join('dataset', 'user_recommendation_sim_based.pkl')

# Output file path
OUTPUT_PATH = os.path.join('output', 'output.csv')

## User Data

In [None]:
df_user = pd.read_json(USER_DATA, lines=True)
df_user

## Item Data

In [6]:
df_item = pd.read_json(ITEM_DATA, lines=True)
df_item

Unnamed: 0,item_id,headline,short_description
0,0,Over 4 Million Americans Roll Up Sleeves For O...,Health experts said it is too early to predict...
1,1,"American Airlines Flyer Charged, Banned For Li...",He was subdued by passengers and crew when he ...
2,2,23 Of The Funniest Tweets About Cats And Dogs ...,"""Until you have a dog you don't understand wha..."
3,3,The Funniest Tweets From Parents This Week (Se...,"""Accidentally put grown-up toothpaste on my to..."
4,4,Woman Who Called Cops On Black Bird-Watcher Lo...,Amy Cooper accused investment firm Franklin Te...
...,...,...,...
209522,209522,RIM CEO Thorsten Heins' 'Significant' Plans Fo...,Verizon Wireless and AT&T are already promotin...
209523,209523,Maria Sharapova Stunned By Victoria Azarenka I...,"Afterward, Azarenka, more effusive with the pr..."
209524,209524,"Giants Over Patriots, Jets Over Colts Among M...","Leading up to Super Bowl XLVI, the most talked..."
209525,209525,Aldon Smith Arrested: 49ers Linebacker Busted ...,CORRECTION: An earlier version of this story i...


In [7]:
df_user_item_sim = pd.read_pickle(USER_ITEM_SIMILARITY)
df_user_item_sim

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,209517,209518,209519,209520,209521,209522,209523,209524,209525,209526
0,0.062287,0.003323,0.261205,0.274883,0.064868,0.013853,0.226747,0.061278,0.196566,0.109174,...,0.043999,0.059647,0.192302,0.062932,0.180494,0.012018,0.048684,0.079522,-0.015544,0.075266
1,0.221764,0.055316,0.112049,0.190925,0.091024,0.078525,0.017141,0.173482,0.051898,0.018566,...,-0.004871,-0.035946,0.117543,-0.005578,0.029166,0.013565,0.055716,0.008542,0.073230,0.038990
2,0.106126,0.022207,0.117865,0.251699,-0.012615,-0.014647,0.126634,0.021103,0.089571,0.015671,...,0.036220,0.084124,-0.012249,0.043811,0.079477,-0.061269,0.037820,0.004233,0.000226,0.029401
3,0.093782,0.016448,0.107896,0.075283,0.067360,-0.035875,0.099989,0.168689,0.128790,0.098932,...,0.056915,0.040844,0.170552,0.072513,0.178354,0.060635,0.035779,0.203117,0.084404,0.080729
4,0.122536,0.071247,0.153996,0.225814,0.183863,0.045011,0.210932,0.074886,0.209297,0.033176,...,0.063099,0.134406,0.060474,0.110714,0.092821,0.012666,0.129592,0.059556,0.067587,0.003772
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1995,0.100583,0.015665,0.170305,0.348556,0.004256,0.037536,0.147794,0.190394,0.262166,0.082865,...,0.060153,0.081491,0.119582,0.026745,0.165972,0.012540,0.067981,0.044060,-0.026443,-0.015400
1996,0.091208,0.042278,0.095109,0.068298,0.088406,0.037034,0.087014,0.329571,0.112063,0.101930,...,-0.045567,-0.018176,0.064000,0.040877,0.201333,0.011241,0.077976,0.176499,0.095174,0.040855
1997,0.084990,0.107297,0.142880,0.165294,0.093537,0.032139,0.144430,0.103643,0.136623,0.098574,...,0.043109,-0.038939,0.077905,0.010034,0.133854,0.056863,0.035067,0.070287,0.059929,0.019807
1998,0.170120,0.071936,0.222804,0.198526,0.098436,-0.000620,0.210958,0.153454,0.097726,0.120007,...,0.026999,0.008165,0.022288,0.071645,0.013865,0.025776,0.133728,0.029238,0.022635,0.013028


In [8]:
df_interact_data = pd.read_pickle(INTERACT_DATA)
df_interact_data[df_interact_data["rating"] == 5]

Unnamed: 0,user_id,item_id,rating
0,0,12126,5
1,0,9810,5
4,0,9102,5
8,0,15075,5
101,0,64266,5
...,...,...,...
795551,999,202879,5
795759,999,175578,5
795765,999,166140,5
795821,999,153337,5


## FunkSVD

In [9]:
class FunkSVDRecommender(tf.keras.Model):
    '''
    Simplified Funk-SVD recommender model
    '''

    def __init__(self, m_users: int, n_items: int, embedding_size: int, learning_rate: float, bias_mu):
        '''
        Constructor of the model
        '''
        super().__init__()
        self.m = m_users
        self.n = n_items
        self.k = embedding_size
        self.lr = learning_rate
        self.B_mu = tf.constant([bias_mu])


        # user embeddings P
        self.P = tf.Variable(tf.keras.initializers.RandomNormal()(shape=(self.m, self.k)))

        # item embeddings Q
        self.Q = tf.Variable(tf.keras.initializers.RandomNormal()(shape=(self.n, self.k)))

        # bias term
        self.B_user = tf.Variable(tf.keras.initializers.RandomNormal()(shape=(self.m, 1)))
        self.B_item = tf.Variable(tf.keras.initializers.RandomNormal()(shape=(self.n, 1)))

        # optimizer
        self.optimizer = tf.optimizers.Adam(learning_rate=self.lr)

    @tf.function
    def call(self, user_ids: tf.Tensor, item_ids: tf.Tensor) -> tf.Tensor:
        '''
        Forward pass used in training and validating
        '''
        # dot product the user and item embeddings corresponding to the observed interaction pairs to produce predictions
        y_pred = tf.reduce_sum(tf.gather(self.P, indices=user_ids) * tf.gather(self.Q, indices=item_ids), axis=1)
        y_pred = tf.add(y_pred, tf.squeeze(tf.gather(self.B_user, indices=user_ids)))
        y_pred = tf.add(y_pred, tf.squeeze(tf.gather(self.B_item, indices=item_ids)))

        return y_pred

    @tf.function
    def compute_loss(self, y_true: tf.Tensor, y_pred: tf.Tensor) -> tf.Tensor:
        '''
        Compute the MSE loss of the model
        '''
        loss = tf.losses.mean_squared_error(y_true, y_pred)

        return loss

    @tf.function
    def train_step(self, data: tf.Tensor) -> tf.Tensor:
        '''
        Train the model with one batch
        data: batched user-item interactions
        each record in data is in the format [UserID, MovieID, Rating, Timestamp]
        '''
        user_ids = tf.cast(data[:, 0], dtype=tf.int32)
        item_ids = tf.cast(data[:, 1], dtype=tf.int32)
        y_true = tf.cast(data[:, 2], dtype=tf.float32)

        # compute loss
        with tf.GradientTape() as tape:
            y_pred = self(user_ids, item_ids)
            loss = self.compute_loss(y_true, y_pred)

        # compute gradients
        gradients = tape.gradient(loss, self.trainable_variables)

        # update weights
        self.optimizer.apply_gradients(zip(gradients, self.trainable_variables))

        return loss

    @tf.function
    def val_step(self, data: tf.Tensor) -> tf.Tensor:
        '''
        Validate the model with one batch
        data: batched user-item interactions
        each record in data is in the format [UserID, MovieID, Rating, Timestamp]
        '''
        user_ids = tf.cast(data[:, 0], dtype=tf.int32)
        item_ids = tf.cast(data[:, 1], dtype=tf.int32)
        y_true = tf.cast(data[:, 2], dtype=tf.float32)

        # compute loss
        y_pred = self(user_ids, item_ids)
        loss = self.compute_loss(y_true, y_pred)

        return loss

    @tf.function
    def eval_predict_onestep(self, user_identity: tf.Tensor) -> tf.Tensor:
        '''
        Retrieve and return the MovieIDs of the 10 recommended movies given a query
        You should return a tf.Tensor with shape=(10,)
        query will be a tf.Tensor with shape=(2,) and dtype=tf.int64
        query[0] is the UserID of the query
        query[1] is the Timestamp of the query
        '''
        # dot product the selected user and all item embeddings to produce predictions
        user_id = tf.cast(user_identity, tf.int32)
        y_pred = tf.reduce_sum(tf.gather(self.P, user_id) * self.Q, axis=1)
        # print(f"y_pred: {y_pred.shape}")

        # select the top 5 items with highest scores in y_pred
        y_top_5 = tf.math.top_k(y_pred, k=5).indices

        return y_top_5
    
    
    @tf.function
    def eval_predict_onestep_env(self, user_identity: tf.Tensor) -> tf.Tensor:
        '''
        Retrieve and return the MovieIDs of the 10 recommended movies given a query
        You should return a tf.Tensor with shape=(10,)
        query will be a tf.Tensor with shape=(2,) and dtype=tf.int64
        query[0] is the UserID of the query
        query[1] is the Timestamp of the query
        '''
        # dot product the selected user and all item embeddings to produce predictions
        user_id = tf.cast(user_identity, tf.int32)
        y_pred = tf.reduce_sum(tf.gather(self.P, user_id) * self.Q, axis=1)

        y_pred_values, y_pred_indices = tf.math.top_k(y_pred, k=2000)
        # sorted_indices = tf.argsort(y_pred, direction='DESCENDING')
        # sorted_y_pred = tf.gather(y_pred, sorted_indices)

        return y_pred_values, y_pred_indices
    
    
    def save_model(self, save_path):
        os.makedirs(save_path, exist_ok=True)
        checkpoint = tf.train.Checkpoint(model=self)
        checkpoint.save(os.path.join(save_path, 'funk_svd_recommender_weights'))    


    def load_model(self, save_path):
        checkpoint = tf.train.Checkpoint(model=self)
        checkpoint.restore(tf.train.latest_checkpoint(save_path))

### FunkSVD Training Data

In [10]:
user_item_info = {uid: {} for uid in range(N_TRAIN_USERS)}  # for every element: {user_id: {item_id: click_count}}

with open('./dataset/clicked_ids_output_final.txt', 'r') as file:
    for line in file:
        line = line.strip().split(', ')
        user_id = int(line[0])
        item_id = int(line[1])
        
        try:
            user_item_info[user_id][item_id] += 1
        except KeyError:
            user_item_info[user_id][item_id] = 1

In [12]:
temp = []
for user_id in range(N_TRAIN_USERS):
    for item_id in list(user_item_info[user_id].keys()):
        temp.append([user_id, item_id, 5])


df_interact = pd.DataFrame(temp, columns=["user_id", "item_id", "rating"])
df_interact

Unnamed: 0,user_id,item_id,rating
0,0,43856,5
1,0,63226,5
2,0,119480,5
3,0,16929,5
4,0,187378,5
...,...,...,...
2137503,999,175254,5
2137504,999,183135,5
2137505,999,165246,5
2137506,999,171658,5


In [13]:
temp = []
for i in range(N_TEST_USERS):
    for item in df_user.iloc[i]["history"]:
        temp.append([df_user["user_id"][i], item, 5])

df_exist = pd.DataFrame(temp, columns=["user_id", "item_id", "rating"])
df_exist

Unnamed: 0,user_id,item_id,rating
0,0,42558,5
1,0,65272,5
2,0,13353,5
3,1,146057,5
4,1,195688,5
...,...,...,...
5995,1998,60372,5
5996,1998,51442,5
5997,1999,125409,5
5998,1999,77906,5


In [14]:
df_ratings = pd.concat([df_exist, df_interact], axis=0, ignore_index=True)
df_ratings

Unnamed: 0,user_id,item_id,rating
0,0,42558,5
1,0,65272,5
2,0,13353,5
3,1,146057,5
4,1,195688,5
...,...,...,...
2143503,999,175254,5
2143504,999,183135,5
2143505,999,165246,5
2143506,999,171658,5


In [15]:
train_dataframes = []
val_dataframes = []

for i in tqdm(range(N_TEST_USERS)):
    user_all = df_ratings[df_ratings['user_id'] == i]
    user_train = user_all.iloc[3:]
    user_val = user_all.iloc[:3]
    # assert len(user_train) == 100
    assert len(user_val) == 3
    train_dataframes.append(user_train)
    val_dataframes.append(user_val)

100%|██████████| 2000/2000 [00:02<00:00, 698.37it/s]


In [16]:
# concat all per-user training sets
df_train = pd.concat(train_dataframes, ignore_index=True)

# normalize the ratings (may be beneficial to some models)
df_train_norm = df_train.copy(deep=True)
df_train_norm['rating'] -= 3
df_train_norm['rating'] /= 2
df_train_norm

Unnamed: 0,user_id,item_id,rating
0,0,43856,1.0
1,0,63226,1.0
2,0,119480,1.0
3,0,16929,1.0
4,0,187378,1.0
...,...,...,...
2137503,999,175254,1.0
2137504,999,183135,1.0
2137505,999,165246,1.0
2137506,999,171658,1.0


In [17]:
# Get bias_mu
BIAS_MU = df_train_norm['rating'].mean()
print('bias_mu = ', BIAS_MU)

bias_mu =  1.0


In [18]:
# concat all per-user validation sets
df_val = pd.concat(val_dataframes, ignore_index=True)

# normalize the ratings (may be beneficial to some models)
# here we make a copy of the un-normalized validation set for evaluation
df_val_norm = df_val.copy(deep=True)
df_val_norm['rating'] -= 3
df_val_norm['rating'] /= 2
df_val_norm

Unnamed: 0,user_id,item_id,rating
0,0,42558,1.0
1,0,65272,1.0
2,0,13353,1.0
3,1,146057,1.0
4,1,195688,1.0
...,...,...,...
5995,1998,60372,1.0
5996,1998,51442,1.0
5997,1999,125409,1.0
5998,1999,77906,1.0


In [19]:
# prepare datasets
dataset_train = tf.data.Dataset.from_tensor_slices(df_train_norm)
dataset_train = dataset_train.batch(batch_size=BATCH_SIZE, num_parallel_calls=tf.data.AUTOTUNE)\
                             .prefetch(buffer_size=tf.data.AUTOTUNE)

dataset_val = tf.data.Dataset.from_tensor_slices(df_val_norm)
dataset_val = dataset_val.batch(batch_size=BATCH_SIZE, num_parallel_calls=tf.data.AUTOTUNE)\
                         .prefetch(buffer_size=tf.data.AUTOTUNE)

### Evaluation Metric

In [20]:
@tf.function
def log2(x: tf.Tensor) -> tf.Tensor:
    return tf.math.log(tf.cast(x, tf.float32)) / tf.math.log(2.)

@tf.function
def ndcg_at_5(y_true: tf.Tensor, y_pred: tf.Tensor) -> tf.Tensor:
    y_pred = y_pred[:5]
    idx = tf.equal(tf.cast(y_pred, tf.int32), tf.cast(y_true, tf.int32))
    if tf.reduce_sum(tf.cast(idx, tf.int32)) > 0:
        return 1. / log2(2 + tf.argmax(idx))
    else:
        return tf.constant(0.)

@tf.function
def recall_at_5(y_true: tf.Tensor, y_pred: tf.Tensor) -> tf.Tensor:
    y_pred = y_pred[:5]
    idx = tf.equal(tf.cast(y_pred, tf.int32), tf.cast(y_true, tf.int32))
    if tf.reduce_sum(tf.cast(idx, tf.int32)) > 0:
        return tf.constant(1.)
    else:
        return tf.constant(0.)

def evaluate(model: tf.keras.Model, dataset: tf.data.Dataset) -> tuple:
    '''
    For each data point in the dataset:
    data[0] is the UserID
    data[1] is the ItemID 
    data[2] is the Rating
    '''
    ndcg_scores = []
    recall_scores = []

    for data in tqdm(dataset, desc='Evaluating'):
        # query the model to make predictions if the observed event is a positive interaction (ratings >= 4)
        if data[2] >= 4:
            y_pred = model.eval_predict_onestep(tf.gather(data, 0))
            # print(f"y_pred:{y_pred}")
            y_true = tf.gather(data, 1)
            # print(f"y_true:{y_true}")
            ndcg = ndcg_at_5(y_true, y_pred)
            recall = recall_at_5(y_true, y_pred)
            ndcg_scores.append(ndcg)
            recall_scores.append(recall)

    ndcg_result = tf.reduce_mean(ndcg_scores).numpy()
    recall_result = tf.reduce_mean(recall_scores).numpy()

    return ndcg_result, recall_result

### Initial the model

In [21]:
# build the model
model = FunkSVDRecommender(m_users=N_TEST_USERS, n_items=N_ITEMS, embedding_size=EMBEDDING_SIZE, learning_rate=LEARNING_RATE, bias_mu=BIAS_MU)

In [22]:
ckp = tf.train.latest_checkpoint(CKP_DIR)
if ckp:
    init_epoch = (int(ckp.split('-')[-1]))
    ckpt = tf.train.Checkpoint(epoch=tf.Variable(init_epoch), model=model)
    ckpt.restore(ckp)
    print(f'Resume training from epoch {init_epoch}')
else:
    init_epoch = 1
    ckpt = tf.train.Checkpoint(epoch=tf.Variable(0), model=model)
    print(f'Start training from epoch {init_epoch}')

manager = tf.train.CheckpointManager(ckpt, CKP_DIR, max_to_keep=3, checkpoint_name='funkSVD_cf')

Start training from epoch 1


In [23]:
# train the model
train_losses = []
val_losses = []

for epoch in range(init_epoch, INITIAL_N_EPOCHS + 1):
    ckpt.epoch.assign_add(1)
    
    train_loss = []
    val_loss = []

    # training
    for data in dataset_train:
        loss = model.train_step(data)
        train_loss.append(loss.numpy())

    # validating
    for data in dataset_val:
        loss = model.val_step(data)
        val_loss.append(loss.numpy())

    # record losses
    avg_train_loss = np.mean(train_loss)
    avg_val_loss = np.mean(val_loss)
    train_losses.append(avg_train_loss)
    val_losses.append(avg_val_loss)

    # print losses
    print(f'Epoch {epoch} train_loss: {avg_train_loss:.4f}, val_loss: {avg_val_loss:.4f}')

    # save checkpoint
    save_path = manager.save()
    if save_path:
        print("Saved checkpoint for epoch {}: {}".format(int(ckpt.epoch), save_path))

Epoch 1 train_loss: 0.9620, val_loss: 0.9300
Saved checkpoint for epoch 1: ./checkpoints/FunkSVD_CF_ver4/funkSVD_cf-1
Epoch 2 train_loss: 0.7581, val_loss: 0.8343
Saved checkpoint for epoch 2: ./checkpoints/FunkSVD_CF_ver4/funkSVD_cf-2
Epoch 3 train_loss: 0.4916, val_loss: 0.7191
Saved checkpoint for epoch 3: ./checkpoints/FunkSVD_CF_ver4/funkSVD_cf-3
Epoch 4 train_loss: 0.2685, val_loss: 0.6043
Saved checkpoint for epoch 4: ./checkpoints/FunkSVD_CF_ver4/funkSVD_cf-4
Epoch 5 train_loss: 0.1330, val_loss: 0.5098
Saved checkpoint for epoch 5: ./checkpoints/FunkSVD_CF_ver4/funkSVD_cf-5
Epoch 6 train_loss: 0.0611, val_loss: 0.4451
Saved checkpoint for epoch 6: ./checkpoints/FunkSVD_CF_ver4/funkSVD_cf-6
Epoch 7 train_loss: 0.0269, val_loss: 0.4079
Saved checkpoint for epoch 7: ./checkpoints/FunkSVD_CF_ver4/funkSVD_cf-7
Epoch 8 train_loss: 0.0119, val_loss: 0.3905
Saved checkpoint for epoch 8: ./checkpoints/FunkSVD_CF_ver4/funkSVD_cf-8
Epoch 9 train_loss: 0.0054, val_loss: 0.3830
Saved check

KeyboardInterrupt: 

In [None]:
# plot the training curve
plt.plot(train_losses, label='train_loss')
plt.plot(val_losses, label='val_loss')
plt.legend(loc='upper right')
plt.title('Loss curve')
plt.show()

### Evaluate the model with the training set and validation set

In [None]:
dataset_train = tf.data.Dataset.from_tensor_slices(df_train)
dataset_train = dataset_train.prefetch(buffer_size=tf.data.AUTOTUNE)
ndcg_result, recall_result = evaluate(model, dataset_train)
print(f'Evaluation result: [NDCG@5: {ndcg_result:.6f}, Recall@5: {recall_result:.6f}]')

In [None]:
dataset_eval = tf.data.Dataset.from_tensor_slices(df_val)
dataset_eval = dataset_eval.prefetch(buffer_size=tf.data.AUTOTUNE)
ndcg_result, recall_result = evaluate(model, dataset_eval)
print(f'Evaluation result: [NDCG@5: {ndcg_result:.6f}, Recall@5: {recall_result:.6f}]')

## Simulation Environments

## Training

In [None]:
# # Initialize the training environment
# train_env = TrainingEnvironment()
# train_scores = []


# interact_data = []
# # Repeat the testing process for 5 times
# for i in range(TRAINING_EPISODES):
#     print(f'Episode {i+1}:')

#     # Start the training process
#     # Run as long as there exist some active users
#     while train_env.has_next_state():
#         # Get the current user id
#         cur_user = train_env.get_state()
#         # print(f'The current user is user {cur_user}.')

#         # [TODO] Employ your recommendation policy to generate a slate of 5 distinct items
#         # slate = []
#         # for i in range(5):
#         #     user_recommendation_sim_based[cur_user].get(i)
        
#         slate = model.eval_predict_onestep(cur_user)

#         # Get the response of the slate from the environment
#         clicked_id, in_environment = train_env.get_response(slate)
#         # print(f'The click result of recommending {slate} to user {cur_user} is {f"item {clicked_id}" if clicked_id != -1 else f"{clicked_id} (no click)"}.')
#         # print(f'User {cur_user} {"is still in" if in_environment else "leaves"} the environment.')

#         if clicked_id == -1:
#             for item in slate:
#                 random_number = random.choice([1, 2, 3])
#                 interact_data.append([cur_user, item.numpy(), random_number])
#         else:
#             interact_data.append([cur_user, clicked_id, 5])

#         # [TODO] Update your model here (optional)
#         ACCUMULATIVE_DATA_SIZE = 100*BATCH_SIZE
#         if len(interact_data) >= ACCUMULATIVE_DATA_SIZE:
#             print(f'Updating Model')
#             df_interact = pd.DataFrame(interact_data[:ACCUMULATIVE_DATA_SIZE], columns=["user_id", "item_id", "rating"])
#             df_interact.to_csv('./interact_data/interact_data_based_on_sim.csv', mode='a', header=False, index=False)
#             interact_data = interact_data[ACCUMULATIVE_DATA_SIZE:]

#             df_interact = pd.concat([df_train, df_interact], axis=0)
#             df_interact_norm = df_interact.copy(deep=True)
#             df_interact_norm['rating'] -= 3
#             df_interact_norm['rating'] /= 2

#             dataset_interact = tf.data.Dataset.from_tensor_slices(df_interact_norm)
#             dataset_interact = dataset_interact.batch(batch_size=BATCH_SIZE, num_parallel_calls=tf.data.AUTOTUNE)\
#                                                 .prefetch(buffer_size=tf.data.AUTOTUNE)
            

#             # train the model
#             train_losses = []
#             val_losses = []

#             for epoch in range(1, N_EPOCHS + 1):
#                 train_loss = []
#                 val_loss = []
#                 # print(f'Epoch {epoch}:')

#                 # training
#                 for data in dataset_interact:
#                     loss = model.train_step(data)
#                     train_loss.append(loss.numpy())

#                 # validating
#                 for data in dataset_val:
#                     loss = model.val_step(data)
#                     val_loss.append(loss.numpy())

#                 # record losses
#                 avg_train_loss = np.mean(train_loss)
#                 avg_val_loss = np.mean(val_loss)
#                 train_losses.append(avg_train_loss)
#                 val_losses.append(avg_val_loss)

#                 # print losses
#                 print(f'Epoch {epoch} train_loss: {avg_train_loss:.4f}, val_loss: {avg_val_loss:.4f}\r', end="")

#             print(f'')


#     # Record the score of this testing episode
#     train_scores.append(train_env.get_score())

#     # Reset the training environment (this can be useful when you have finished one episode of simulation and do not want to re-initialize a new environment)
#     train_env.reset()
#     print(f'Have collected {len(interact_data)} interact data\n')

#     # [TODO] Delete or reset your model weights here (in the end of each testing episode)
#     # [TODO] Code for deleting your model weights...

In [None]:
# # Calculate the average scores 
# avg_scores = [np.average(train_scores) for train_scores in zip(*train_scores)]
# df_train_score = pd.DataFrame([[user_id, avg_score] for user_id, avg_score in enumerate(avg_scores)], columns=['user_id', 'avg_score'])
# df_train_score

In [None]:
# print(f'training score: {1-(df_train_score["avg_score"].sum()/2000)}')

In [None]:
# # plot the training curve
# plt.plot(train_losses, label='train_loss')
# plt.plot(val_losses, label='val_loss')
# plt.legend(loc='upper right')
# plt.title('Loss curve')
# plt.show()

### Evaluate the model with the training set and validation set

In [None]:
# dataset_train = tf.data.Dataset.from_tensor_slices(df_interact)
# variable = dataset_train.prefetch(buffer_size=tf.data.AUTOTUNE)
# ndcg_result, recall_result = evaluate(model, dataset_train)
# print(f'Evaluation result: [NDCG@5: {ndcg_result:.6f}, Recall@5: {recall_result:.6f}]')

In [None]:
# dataset_val = tf.data.Dataset.from_tensor_slices(df_val)
# dataset_val = dataset_val.prefetch(buffer_size=tf.data.AUTOTUNE)
# ndcg_result, recall_result = evaluate(model, dataset_val)
# print(f'Evaluation result: [NDCG@5: {ndcg_result:.6f}, Recall@5: {recall_result:.6f}]')

### Save FunkSVD

In [None]:
# # save the model
# model.save_model(MODEL_PATH)

## Testing

In [None]:
ckp = tf.train.latest_checkpoint(CKP_DIR)
if ckp:
    init_epoch = (int(ckp.split('-')[-1]))
    ckpt = tf.train.Checkpoint(epoch=tf.Variable(init_epoch), model=model)
    ckpt.restore(ckp)
    print(f'Restore checkpoint from latest epoch {init_epoch}')

In [None]:
prob = np.empty((N_TEST_USERS, 2000))

for user_id in tqdm(range(N_TEST_USERS)):
    sorted_y_pred, sorted_indices = model.eval_predict_onestep_env(user_id)
    # print("sorted_indices:\n", sorted_indices)
    # print("sorted_y_pred:\n", sorted_y_pred)
    
    sorted_y_pred_np = sorted_y_pred.numpy()
    total_weights = sum(sorted_y_pred_np)
    normalized_weights = (sorted_y_pred_np / total_weights).tolist()
    normalized_weights[-1] = 1.0 - sum(normalized_weights[:-1])
    prob[user_id] = normalized_weights

In [None]:
# Initialize the testing environment
test_env = TestingEnvironment()
scores = []

# The item_ids here is for the random recommender
item_ids = [i for i in range(N_ITEMS)]

# Repeat the testing process for 5 times
for _ in range(1):
    # [TODO] Load your model weights here (in the beginning of each testing episode)
    # [TODO] Code for loading your model weights...
    
    # funk_SVD = FunkSVDRecommender(m_users=N_TEST_USERS, n_items=N_ITEMS, embedding_size=EMBEDDING_SIZE, learning_rate=LEARNING_RATE, bias_mu=BIAS_MU)
    # funk_SVD.load_model(MODEL_PATH)

    ckp = tf.train.latest_checkpoint(CKP_DIR)
    if ckp:
        init_epoch = (int(ckp.split('-')[-1]))
        ckpt = tf.train.Checkpoint(epoch=tf.Variable(init_epoch), model=model)
        ckpt.restore(ckp)
        print(f'Restore checkpoint from latest epoch {init_epoch}')

    choosen_item = np.zeros((N_TEST_USERS, N_ITEMS), dtype=np.int8)
    
    # Start the testing process
    with tqdm(desc='Testing') as pbar:
        # Run as long as there exist some active users
        while test_env.has_next_state():
            # Get the current user id
            cur_user = test_env.get_state()

            # [TODO] Employ your recommendation policy to generate a slate of 5 distinct items
            # [TODO] Code for generating the recommended slate...
            # Here we provide a simple random implementation
            # slate = random.sample(item_ids, k=SLATE_SIZE)
            
            
            sorted_y_pred, sorted_indices = model.eval_predict_onestep_env(cur_user)
            sorted_y_pred_np = sorted_y_pred.numpy()
            total_weights = sum(sorted_y_pred_np)
            normalized_weights = (sorted_y_pred_np / total_weights).tolist()
            normalized_weights[-1] = 1.0 - sum(normalized_weights[:-1])
            
            # slate嚴格挑選，選過就不選
            # slate = []
            # for item_id in sorted_indices:
                # if choosen_item[cur_user][item_id] == 0:
                    # slate.append(item_id)
                # if len(slate) == 5:
                    # break
            
            # # 以y_pred為權重，隨機挑選5個當作slate
            slate = np.random.choice(sorted_indices, size=5, replace=False, p=normalized_weights)
            
            # Get the response of the slate from the environment
            clicked_id, in_environment = test_env.get_response(slate)
            # if (clicked_id != -1):
            #     choosen_item[cur_user][clicked_id] = 1


            # [TODO] Update your model here (optional)
            # [TODO] You can update your model at each step, or perform a batched update after some interval
            # [TODO] Code for updating your model...


            # Update the progress indicator
            pbar.update(1)

    # Record the score of this testing episode
    scores.append(test_env.get_score())

    # Reset the testing environment
    test_env.reset()

    # [TODO] Delete or reset your model weights here (in the end of each testing episode)
    # [TODO] Code for deleting your model weights...

# Calculate the average scores 
avg_scores = [np.average(score) for score in zip(*scores)]

# Generate a DataFrame to output the result in a .csv file
df_result = pd.DataFrame([[user_id, avg_score] for user_id, avg_score in enumerate(avg_scores)], columns=['user_id', 'avg_score'])
df_result.to_csv(OUTPUT_PATH, index=False)
df_result

In [None]:
print(f'testing score: {1-(df_result["avg_score"].sum()/2000)}')