In [1]:
import tensorflow as tf
from tensorflow.keras import Model
from dataset import DataSet
import numpy as np
from tensorflow.keras.initializers import TruncatedNormal
from tqdm import tqdm
from time import time
import pandas as pd
import numpy as np
import pickle
import pandas as pd
from tqdm import tqdm

In [2]:
# base_folder = 'F:\\Projects\\train\\episerver\\data\\rs\\'
# base_folder = 'E:\\Projects\\Train\\episerver\\data\\rs\\'
model_folder = 'E:\\Projects\\Train\\episerver\\model\\rs\\'

In [3]:
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus.__len__() > 0:
    tf.config.experimental.set_visible_devices(gpus[1], 'GPU')

In [4]:
class DataSet:

    def __init__(self, ratings, batch_size=128):
        self.ratings = ratings
        self.batch_size = batch_size
        self.num_batch = self.ratings.shape[0] // self.batch_size

    def shuffle(self):
        np.random.shuffle(self.ratings)

    def get_batch(self, i):
        user_ids = self.ratings[i * self.batch_size: (i + 1) * self.batch_size, 0]
        item_ids = self.ratings[i * self.batch_size: (i + 1) * self.batch_size, 1]
        rates = self.ratings[i * self.batch_size: (i + 1) * self.batch_size, 2]
        return (np.array(user_ids, dtype=np.int32),
                np.array(item_ids, dtype=np.int32),
                np.array(rates, dtype=np.float32))

In [5]:
class RSModel(Model):

    def __init__(self, args):
        super(RSModel, self).__init__()
        self.embedding_size = args['embedding_size']
        self.keyword_embedding_size = args['keyword_embedding_size']
        self.alpha = args['alpha']
        self.beta = args['beta']
        self.gamma = args['gamma']
        self.num_items = args['num_items']
        self.num_users = args['num_users']
        self.num_keywords = args['num_keywords']
        self.item_keywords = tf.constant(args['item_keywords'], dtype=tf.int32)
        self.keyword_embedding = tf.keras.layers.Embedding(input_dim=self.num_keywords + 1, output_dim=self.keyword_embedding_size,
                                                           embeddings_initializer=TruncatedNormal(mean=0., stddev=0.1),
                                                           mask_zero=True,
                                                           embeddings_regularizer=tf.keras.regularizers.L2(self.alpha)
                                                           )
        self.user_embedding = tf.keras.layers.Embedding(input_dim=self.num_users + 1, output_dim=self.embedding_size,
                                                        embeddings_initializer=TruncatedNormal(mean=0., stddev=0.1),
                                                        embeddings_regularizer=tf.keras.regularizers.L2(self.beta))
        self.item_embedding = tf.keras.layers.Embedding(input_dim=self.num_items, output_dim=self.embedding_size,
                                                        embeddings_initializer=TruncatedNormal(mean=0., stddev=0.1),
                                                        embeddings_regularizer=tf.keras.regularizers.L2(self.beta))
        self.bias_u = tf.keras.layers.Embedding(input_dim=self.num_users + 1, output_dim=1,
                                                embeddings_initializer=TruncatedNormal(mean=0., stddev=0.1),
                                                embeddings_regularizer=tf.keras.regularizers.L2(self.gamma))
        self.bias_i = tf.keras.layers.Embedding(input_dim=self.num_items, output_dim=1,
                                                embeddings_initializer=TruncatedNormal(mean=0., stddev=0.1),
                                                embeddings_regularizer=tf.keras.regularizers.L2(self.gamma))
        self.mlp_dense = tf.keras.layers.Dense(units=1)

    def call(self, user_ids, item_ids):
        user_bias = self.bias_u(user_ids)
        item_bias = self.bias_i(item_ids)
        # matrix factorization
        users_embedding = self.user_embedding(user_ids)
        items_embedding = self.item_embedding(item_ids)
        mf = tf.math.multiply(users_embedding, items_embedding)
        # mlp
        item_keyword = tf.nn.embedding_lookup(self.item_keywords, item_ids)
        item_keyword_embedding = self.keyword_embedding(item_keyword)
        item_encode = tf.reduce_sum(item_keyword_embedding, axis=1)
        item_encode = self.mlp_dense(item_encode)
        # rating score
        r = tf.squeeze(user_bias) + tf.squeeze(item_bias) + tf.reduce_sum(mf, axis=1) + tf.reduce_sum(item_encode, axis=1)
        
#         r = tf.squeeze(user_bias) + tf.squeeze(item_bias) + tf.reduce_sum(mf, axis=1)
        return r

    def loss_fn_rmse(self, predictions, labels):
        loss = tf.reduce_sum(tf.math.square(predictions - labels))
        loss += tf.reduce_sum(self.keyword_embedding.losses)
        loss += tf.reduce_sum(rsmodel.user_embedding.losses) + tf.reduce_sum(rsmodel.item_embedding.losses)
#         loss += tf.reduce_sum(self.bias_u.losses) + tf.reduce_sum(self.bias_i.losses)
        return loss


@tf.function
def train_step(rs_model, optimizer, user_ids, item_ids, ratings):
    with tf.GradientTape() as tape:
        predictions = rs_model(user_ids, item_ids)
        loss = rs_model.loss_fn_rmse(predictions, ratings)
    gradients = tape.gradient(target=loss, sources=rs_model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, rs_model.trainable_variables))
    return loss


def get_val_rmse(rs_model, val_dataset):
    all_ratings = []
    all_predictions = []
    for i in tqdm(range(val_dataset.num_batch)):
        user_ids, item_ids, ratings = val_dataset.get_batch(i)
        predictions = rsmodel(user_ids, item_ids)
        all_predictions.append(predictions.numpy())
        all_ratings.append(ratings)
    val_predictions = np.concatenate(all_predictions, axis=0)
    val_ratings = np.concatenate(all_ratings, axis=0)    
    return np.sqrt(np.mean((val_predictions - val_ratings) ** 2))


def training(rs_model, optimizer, train_dataset, val_dataset, num_epochs, pretrained=False):
    epoch_step = tf.Variable(0, dtype=tf.int32)
    ckpt = tf.train.Checkpoint(fism_model=rs_model, epoch_step=epoch_step)
    manager = tf.train.CheckpointManager(checkpoint=ckpt, directory='./rsmodel_ckpt', max_to_keep=3)
    if pretrained:
        ckpt.restore(manager.latest_checkpoint)
    for epoch in range(num_epochs):
        train_loss = tf.constant(0, tf.float32)
        start_load_data = time()
        train_dataset.shuffle()
        load_data_time = time() - start_load_data
        start_train_time = time()
        for i in tqdm(range(train_dataset.num_batch)):
            user_ids, item_ids, ratings = train_dataset.get_batch(i)
            loss_step = train_step(rs_model, optimizer, user_ids, item_ids, ratings)
            train_loss += loss_step
            if i > 1000:
                break
        train_time = time() - start_train_time
        print('epoch: ', epoch, '. load data time: ', load_data_time, '. train time: ', train_time, '. train loss: ', train_loss.numpy())
        if epoch % 2 == 0:
            val_rmse = get_val_rmse(rs_model, val_dataset)
            score = {'val_rmse': val_rmse}

            print('epoch: {}, score: {}'.format(epoch, score))
            ckpt.epoch_step.assign_add(epoch + 1)
            manager.save()
            print('done save at epoch: ', ckpt.epoch_step.numpy())

#### load data

In [6]:
train = pickle.load(open(model_folder + 'train.pkl', 'rb'))
val = pickle.load(open(model_folder + 'val.pkl', 'rb'))
test = pickle.load(open(model_folder + 'test.pkl', 'rb'))

movie_id_idx_map = pickle.load(open(model_folder + 'movie_id_idx_map.pkl', 'rb'))
idx_movie_id_map = pickle.load(open(model_folder + 'idx_movie_id_map.pkl', 'rb'))
meta_data = pickle.load(open(model_folder + 'meta_data.pkl', 'rb'))

item_keywords = pickle.load(open(model_folder + 'item_keywords.pkl', 'rb'))

In [7]:
train_dataset = DataSet(train[['userId', 'itemId', 'rating']].values, batch_size=1024)
val_dataset = DataSet(val[['userId', 'itemId', 'rating']].values, batch_size=1024)
test_dataset = DataSet(test[['userId', 'itemId', 'rating']].values, batch_size=1024)

In [8]:
args = dict()
args['embedding_size'] = 128
args['keyword_embedding_size'] = 128
args['alpha'] = 0.005
args['beta'] = 0.005
args['gamma'] = 0.000
args['num_items'] = meta_data['num_items']
args['num_users'] = meta_data['num_users']
args['num_keywords'] = meta_data['num_keywords']
args['item_keywords'] = item_keywords


rsmodel = RSModel(args)
opt = tf.keras.optimizers.Adam(learning_rate=0.005)

In [9]:
meta_data['num_users']

270896

In [10]:
training(rsmodel, opt, train_dataset, val_dataset, num_epochs=10)

  4%|███▎                                                                       | 1001/22872 [05:08<1:52:11,  3.25it/s]
  3%|██▍                                                                            | 39/1270 [00:00<00:06, 187.25it/s]

epoch:  0 . load data time:  33.879422187805176 . train time:  308.08832812309265 . train loss:  1249405.5


100%|█████████████████████████████████████████████████████████████████████████████| 1270/1270 [00:06<00:00, 192.12it/s]


epoch: 0, score: {'val_rmse': 0.9370996}
done save at epoch:  1


  4%|███▎                                                                       | 1001/22872 [04:57<1:48:16,  3.37it/s]


epoch:  1 . load data time:  32.93997597694397 . train time:  297.32011556625366 . train loss:  1212435.2


  4%|███▎                                                                       | 1001/22872 [04:51<1:46:11,  3.43it/s]
  3%|██▌                                                                            | 41/1270 [00:00<00:06, 196.16it/s]

epoch:  2 . load data time:  32.84723973274231 . train time:  291.6258432865143 . train loss:  1194330.1


100%|█████████████████████████████████████████████████████████████████████████████| 1270/1270 [00:06<00:00, 190.46it/s]


epoch: 2, score: {'val_rmse': 0.8969592}
done save at epoch:  4


  4%|███▎                                                                       | 1001/22872 [04:54<1:47:25,  3.39it/s]


epoch:  3 . load data time:  34.59657096862793 . train time:  294.9803729057312 . train loss:  1165252.0


  4%|███▎                                                                       | 1001/22872 [04:49<1:45:25,  3.46it/s]
  1%|█▏                                                                             | 19/1270 [00:00<00:06, 188.62it/s]

epoch:  4 . load data time:  33.262073278427124 . train time:  289.48606157302856 . train loss:  1146861.1


100%|█████████████████████████████████████████████████████████████████████████████| 1270/1270 [00:07<00:00, 176.35it/s]


epoch: 4, score: {'val_rmse': 0.88719815}
done save at epoch:  9


  4%|███▎                                                                       | 1001/22872 [05:24<1:58:05,  3.09it/s]


epoch:  5 . load data time:  34.816917419433594 . train time:  324.2721002101898 . train loss:  1135320.5


  4%|███▎                                                                       | 1001/22872 [05:04<1:50:55,  3.29it/s]
  1%|█                                                                              | 18/1270 [00:00<00:07, 175.23it/s]

epoch:  6 . load data time:  34.01309633255005 . train time:  304.6165795326233 . train loss:  1131118.5


100%|█████████████████████████████████████████████████████████████████████████████| 1270/1270 [00:07<00:00, 174.77it/s]


epoch: 6, score: {'val_rmse': 0.88753444}
done save at epoch:  16


  4%|███▎                                                                       | 1001/22872 [04:59<1:48:55,  3.35it/s]


epoch:  7 . load data time:  34.43848276138306 . train time:  299.11631441116333 . train loss:  1127370.4


  4%|███▎                                                                       | 1001/22872 [05:00<1:49:27,  3.33it/s]
  1%|▊                                                                              | 14/1270 [00:00<00:09, 136.29it/s]

epoch:  8 . load data time:  33.23218393325806 . train time:  300.5798923969269 . train loss:  1120706.6


100%|█████████████████████████████████████████████████████████████████████████████| 1270/1270 [00:07<00:00, 173.98it/s]


epoch: 8, score: {'val_rmse': 0.88174975}
done save at epoch:  25


  4%|███▎                                                                       | 1001/22872 [05:02<1:50:00,  3.31it/s]

epoch:  9 . load data time:  33.84554696083069 . train time:  302.1072859764099 . train loss:  1117058.0





In [10]:
training(rsmodel, opt, train_dataset, val_dataset, num_epochs=10)

  4%|███▎                                                                         | 1001/22872 [02:29<54:35,  6.68it/s]
  3%|██▍                                                                            | 40/1270 [00:00<00:03, 393.20it/s]

epoch:  0 . load data time:  34.365124464035034 . train time:  149.92917370796204 . train loss:  1120230.2


100%|█████████████████████████████████████████████████████████████████████████████| 1270/1270 [00:03<00:00, 401.08it/s]


epoch: 0, score: {'val_rmse': 0.93591845}
done save at epoch:  1


  4%|███▎                                                                         | 1001/22872 [02:28<54:10,  6.73it/s]


epoch:  1 . load data time:  34.087865352630615 . train time:  148.75131464004517 . train loss:  1044392.8


  4%|███▎                                                                         | 1001/22872 [02:27<53:41,  6.79it/s]
  3%|██▍                                                                            | 40/1270 [00:00<00:03, 389.39it/s]

epoch:  2 . load data time:  32.71967267990112 . train time:  147.4328145980835 . train loss:  1021546.25


100%|█████████████████████████████████████████████████████████████████████████████| 1270/1270 [00:03<00:00, 409.72it/s]


epoch: 2, score: {'val_rmse': 0.89548504}
done save at epoch:  4


  4%|███▎                                                                         | 1001/22872 [02:25<53:03,  6.87it/s]


epoch:  3 . load data time:  33.15340971946716 . train time:  145.6895010471344 . train loss:  1000892.44


  4%|███▎                                                                         | 1001/22872 [02:24<52:37,  6.93it/s]
  3%|██▍                                                                            | 39/1270 [00:00<00:03, 383.38it/s]

epoch:  4 . load data time:  32.7544584274292 . train time:  144.4976589679718 . train loss:  988913.4


100%|█████████████████████████████████████████████████████████████████████████████| 1270/1270 [00:03<00:00, 410.61it/s]


epoch: 4, score: {'val_rmse': 0.8833952}
done save at epoch:  9


  4%|███▎                                                                         | 1001/22872 [02:32<55:34,  6.56it/s]


epoch:  5 . load data time:  32.53402066230774 . train time:  152.60102224349976 . train loss:  984102.9


  4%|███▎                                                                         | 1001/22872 [02:33<55:44,  6.54it/s]
  6%|████▊                                                                          | 77/1270 [00:00<00:03, 375.77it/s]

epoch:  6 . load data time:  34.2314817905426 . train time:  153.05780148506165 . train loss:  977282.94


100%|█████████████████████████████████████████████████████████████████████████████| 1270/1270 [00:03<00:00, 386.46it/s]


epoch: 6, score: {'val_rmse': 0.87926906}
done save at epoch:  16


  4%|███▎                                                                         | 1001/22872 [02:27<53:39,  6.79it/s]


epoch:  7 . load data time:  34.48778676986694 . train time:  147.34806656837463 . train loss:  973930.1


  4%|███▎                                                                         | 1001/22872 [02:26<53:26,  6.82it/s]
  6%|████▌                                                                          | 74/1270 [00:00<00:03, 354.98it/s]

epoch:  8 . load data time:  32.73548197746277 . train time:  146.7486686706543 . train loss:  972354.3


100%|█████████████████████████████████████████████████████████████████████████████| 1270/1270 [00:03<00:00, 387.29it/s]


epoch: 8, score: {'val_rmse': 0.8769898}
done save at epoch:  25


  4%|███▎                                                                         | 1001/22872 [02:29<54:18,  6.71it/s]

epoch:  9 . load data time:  32.93003034591675 . train time:  149.15323948860168 . train loss:  965386.2





In [None]:
training(rsmodel, opt, train_dataset, val_dataset, num_epochs=10)

  4%|███▎                                                                         | 1001/22872 [02:25<53:05,  6.86it/s]
  8%|██████▏                                                                       | 100/1270 [00:00<00:01, 983.01it/s]

epoch:  0 . load data time:  34.43304133415222 . train time:  145.82096195220947 . train loss:  6085918.0


100%|█████████████████████████████████████████████████████████████████████████████| 1270/1270 [00:01<00:00, 985.62it/s]


epoch: 0, score: {'val_rmse': 1.3326454}
done save at epoch:  1


  4%|███▎                                                                         | 1001/22872 [02:24<52:37,  6.93it/s]


epoch:  1 . load data time:  33.41731905937195 . train time:  144.53356170654297 . train loss:  2219069.8


  4%|███▎                                                                         | 1001/22872 [02:25<52:59,  6.88it/s]
  7%|█████▊                                                                         | 93/1270 [00:00<00:01, 923.24it/s]

epoch:  2 . load data time:  33.36878800392151 . train time:  145.50499296188354 . train loss:  1835103.2


100%|█████████████████████████████████████████████████████████████████████████████| 1270/1270 [00:01<00:00, 959.74it/s]


epoch: 2, score: {'val_rmse': 1.0594991}
done save at epoch:  4


  4%|███▎                                                                         | 1001/22872 [02:28<54:09,  6.73it/s]


epoch:  3 . load data time:  33.23911452293396 . train time:  148.71837329864502 . train loss:  1558471.5


  4%|███▎                                                                         | 1001/22872 [02:24<52:41,  6.92it/s]
 15%|███████████▉                                                                  | 195/1270 [00:00<00:01, 963.31it/s]

epoch:  4 . load data time:  32.914036989212036 . train time:  144.70812463760376 . train loss:  1347344.5


100%|█████████████████████████████████████████████████████████████████████████████| 1270/1270 [00:01<00:00, 973.85it/s]


epoch: 4, score: {'val_rmse': 0.9718058}
done save at epoch:  9
