<a href="https://colab.research.google.com/github/lengochai97/thesis/blob/master/notebooks/models/Offline_Part.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# First things first

In [0]:
!pip install -q tensorflow==1.13.1

In [0]:
import google.colab.drive

google.colab.drive.mount('/content/gdrive')

In [0]:
import tensorflow as tf

# tf.enable_eager_execution()

In [0]:
import glob
import os

import numpy as np
import pandas as pd
import tensorflow as tf
import tqdm

# Load dataset

In [0]:
DATA_PATH = '/content/gdrive/My Drive/dataset/adressa/one_week'

In [0]:
features = {
    'eventId': tf.FixedLenFeature([], tf.int64),
    'clickLabel': tf.FixedLenFeature([], tf.int64),
    'userActiveness': tf.FixedLenFeature([], tf.float32),
    'categoryVector': tf.FixedLenFeature([30], tf.float32),
    'newsClickCountVector': tf.FixedLenFeature([4], tf.float32),
    'contextVector': tf.FixedLenFeature([32], tf.float32),
    'userHistoryVector': tf.FixedLenFeature([30], tf.float32),
    'userProfileVector': tf.FixedLenFeature([120], tf.float32),
    'userClickCountVector': tf.FixedLenFeature([4], tf.float32),
    'userHistoryVectorNext': tf.FixedLenFeature([30], tf.float32),
    'userProfileVectorNext': tf.FixedLenFeature([120], tf.float32),
    'userClickCountVectorNext': tf.FixedLenFeature([4], tf.float32),
}


def parse_example(serialized):
  e = tf.io.parse_single_example(serialized, features)
  
  return {
      'event_id': e['eventId'],
      
      'click_label': e['clickLabel'],
      
      'user_activeness': e['userActiveness'],
      
      'news_features': tf.concat([e['categoryVector'], tf.math.log(e['newsClickCountVector'] + 1.)], 0),
      
      'user_features': tf.concat([e['userProfileVector'], tf.math.log(e['userClickCountVector'] + 1.)], 0),
      
      'user_features_next': tf.concat([e['userProfileVectorNext'], tf.math.log(e['userClickCountVectorNext'] + 1.)], 0),
      
      'user_news_features': tf.math.reduce_prod([e['categoryVector'], e['userHistoryVector']], axis=0),
      
      'user_news_features_next': tf.math.reduce_prod([e['categoryVector'], e['userHistoryVectorNext']], axis=0),
      
      'context_features': e['contextVector'],
  }


def parse_inputs_targets(serialized):
  e = tf.io.parse_single_example(serialized, features)
  
  inputs = {
      'news_features': tf.concat([e['categoryVector'], tf.math.log(e['newsClickCountVector'] + 1.)], 0),
      
      'user_features': tf.concat([e['userProfileVector'], tf.math.log(e['userClickCountVector'] + 1.)], 0),
      
      'user_news_features': tf.math.reduce_prod([e['categoryVector'], e['userHistoryVector']], axis=0),
      
      'context_features': e['contextVector'],
  }
  
  targets = e['clickLabel']
  
  return inputs, targets

def parse_inputs_targets_user_activeness(serialized):
  e = tf.io.parse_single_example(serialized, features)
  
  inputs = {
      'news_features': tf.concat([e['categoryVector'], tf.math.log(e['newsClickCountVector'] + 1.)], 0),
      
      'user_features': tf.concat([e['userProfileVector'], tf.math.log(e['userClickCountVector'] + 1.)], 0),
      
      'user_news_features': tf.math.reduce_prod([e['categoryVector'], e['userHistoryVector']], axis=0),
      
      'context_features': e['contextVector'],
  }
  
  user_activeness_coef = tf.constant(.05, tf.float32)
  
  targets = tf.dtypes.cast(e['clickLabel'], tf.float32) + user_activeness_coef * e['userActiveness']
  
  return inputs, targets

In [0]:
def build_dataset_train(filepaths, batch_size, epochs):
  dataset = tf.data.TFRecordDataset(filepaths, 'GZIP')
  
  dataset = (
      dataset
      .map(parse_inputs_targets)
      .batch(batch_size)
      .repeat(epochs)
      .prefetch(1)
  )
  
  return dataset

def build_dataset_train_user_activeness(filepaths, batch_size, epochs):
  dataset = tf.data.TFRecordDataset(filepaths, 'GZIP')
  
  dataset = (
      dataset
      .map(parse_inputs_targets_user_activeness)
      .batch(batch_size)
      .repeat(epochs)
      .prefetch(1)
  )
  
  return dataset

In [0]:
# def flatten_batch(batch):
#   return {
#       'event_id': tf.reshape(batch['event_id'], [-1]),
#       'click_label': tf.reshape(batch['click_label'], [-1]),
#       'user_activeness': tf.reshape(batch['user_activeness'], [-1]),
#       'news_features': tf.reshape(batch['news_features'], [-1, batch['news_features'].shape[2]]),
#       'user_features': tf.reshape(batch['user_features'], [-1, batch['user_features'].shape[2]]),
#       'user_features_next': tf.reshape(batch['user_features_next'], [-1, batch['user_features_next'].shape[2]]),
#       'user_news_features': tf.reshape(batch['user_news_features'], [-1, batch['user_news_features'].shape[2]]),
#       'user_news_features_next': tf.reshape(batch['user_news_features_next'], [-1, batch['user_news_features_next'].shape[2]]),
#       'context_features': tf.reshape(batch['context_features'], [-1, batch['context_features'].shape[2]]),
#   }

# def build_dataset_test(filepaths, n_candidates, batch_size_update):
#   dataset = tf.data.TFRecordDataset(filepaths, 'GZIP')
  
#   dataset = (
#       dataset
#       .map(parse_example)
#       .apply(
#           tf.data.experimental.group_by_window(
#               key_func=lambda e: e['event_id'],
#               reduce_func=lambda k, ds: ds.repeat(n_candidates).batch(n_candidates).take(1),
#               window_size=n_candidates,
#           )
#       )
#       .batch(batch_size_update)
#       .map(flatten_batch)
#       .prefetch(1)
#   )
  
#   return dataset

In [0]:
# def get_recommendation_indices(predictions, n_candidates, k_recommendations):
#   n_events = predictions.shape[0] // n_candidates
  
#   return np.reshape(
#       (np.argsort(np.reshape(predictions, (n_events, n_candidates)), axis=1)[:, -k_recommendations:]
#       + np.reshape(np.arange(n_events) * n_candidates, (n_events, 1))),
#       (n_events * k_recommendations, 1)
#   )


# def get_recommendation_data(batch, indices):
#   return {k: tf.gather_nd(v, indices) for k, v in batch.items()}

In [0]:
# Dataset details

n_samples_train = 12915691
# n_samples_test = 1796331

n_events_train = 1076343
# n_events_test = 149824

n_candidates = 12
k_recommendations = 5

# Fit parameters

batch_size = 1024
epochs = 1
steps_per_epoch = int(np.ceil(n_samples_train / batch_size))

# Evaluate parameters

# batch_size_update = 200 # Update networks after every `batch_size_update` requests

In [0]:
filepaths = sorted(glob.glob(os.path.join(DATA_PATH, 'tfrecords', 'train', '*')))

dataset_train = build_dataset_train(filepaths, batch_size, epochs)

In [0]:
dataset_train_user_activeness = build_dataset_train_user_activeness(filepaths, batch_size, epochs)

In [0]:
# filepaths = sorted(glob.glob(os.path.join(DATA_PATH, 'tfrecords', 'test', '*')))

# dataset_test = build_dataset_test(filepaths, n_candidates, batch_size_update)

# Models

In [0]:
from tensorflow.keras.activations import relu, sigmoid
from tensorflow.keras.layers import Activation, Add, Concatenate, Dense, Dot, Input, Lambda, Subtract
from tensorflow.keras.losses import BinaryCrossentropy, MeanSquaredError
from tensorflow.keras.metrics import binary_accuracy
from tensorflow.keras.models import load_model, Model
from tensorflow.keras.optimizers import Adam

In [0]:
input_info = (
    ('news_features', (34,)),
    ('user_features', (124,)),
    ('user_news_features', (30,)),
    ('context_features', (32,)),
)

In [0]:
# def evaluate(eval_id, eval_click, eval_prediction, k_recommendations):
#   df = pd.DataFrame({
#       'id': eval_id.numpy(),
#       'click': eval_click.numpy(),
#       'prediction': eval_prediction.numpy(),
#   })
  
#   df['rank'] = df.groupby('id')['prediction'].rank(method='max', ascending=False)
  
#   patk = df[(df['click'] == 1) & (df['rank'] <= k_recommendations)].shape[0] / (df[(df['click'] == 1)].shape[0] * k_recommendations)
  
#   ndcg = df[(df['click'] == 1)]['rank'].map(lambda r: 1 / np.log2(r + 1) if r <= k_recommendations else 0).mean()
  
#   return patk, ndcg

## 1. Logistic Regression

In [0]:
def build_lr(input_info):
  inputs = [Input(shape=shape, name=name) for name, shape in input_info]
  
  inputs_concat = Concatenate()(inputs)
  
  outputs = Dense(1, activation=sigmoid)(inputs_concat)
  
  model = Model(inputs=inputs, outputs=outputs)  
  model.compile(RMSprop(), loss=BinaryCrossentropy(), metrics=[binary_accuracy])
  
  return model

### Train

In [0]:
lr = build_lr(input_info)

In [0]:
lr.fit(dataset_train, batch_size=batch_size, epochs=epochs, steps_per_epoch=steps_per_epoch)

In [0]:
lr.save(os.path.join(DATA_PATH, 'model', 'lr_weights.h5'), overwrite=True, include_optimizer=False)

### Evaluate

In [0]:
# lr_test = build_lr(input_info)
# lr_test.load_weights(os.path.join(DATA_PATH, 'model', 'lr_weights.h5'))

In [0]:
# eval_id = tf.constant([], dtype=tf.int64)
# eval_click = tf.constant([], dtype=tf.int64)
# eval_prediction = tf.constant([], dtype=tf.float32)

# for batch in tqdm.tqdm(dataset_test, total=np.ceil(n_events_test / batch_size_update)):
#   predictions = lr_test.predict(batch, steps=1)
  
#   recommendation_indices = get_recommendation_indices(predictions, n_candidates, k_recommendations)
#   recommendation_data = get_recommendation_data(batch, recommendation_indices)
  
#   eval_id = tf.concat([eval_id, batch['event_id']], 0)
#   eval_click = tf.concat([eval_click, batch['click_label']], 0)
#   eval_prediction = tf.concat([eval_prediction, predictions.flatten()], 0)
  
#   batch_size = (predictions.shape[0] // n_candidates * k_recommendations)
  
  
#   lr_test.fit(recommendation_data, recommendation_data['click_label'], verbose=0, batch_size=batch_size, steps_per_epoch=1)

In [0]:
# evaluate(eval_id, eval_click, eval_prediction, k_recommendations)

## 2. Factorization Machines

In [0]:
def build_fm(input_info, k_latent):
  inputs = [Input(shape=shape, name=name) for name, shape in input_info]
  
  inputs_concat = Concatenate()(inputs)
  
  inputs_flat = [Lambda(lambda x: x[:, i:i+1])(inputs_concat) for i in range(inputs_concat.shape[1].value)]
  
  biases = [Dense(1)(x) for x in inputs_flat]
  
  factors = [Dense(k_latent)(x) for x in inputs_flat]
  
  s = Add()(factors)
  
  diffs = [Subtract()([s, x]) for x in factors]
  
  dots = [Dot(axes=1)([d, x]) for d, x in zip(diffs, factors)]
  
  outputs = Add()(dots + biases)
  outputs = Dense(1, activation=sigmoid)(outputs)

  model = Model(inputs=inputs, outputs=outputs)

  model.compile(Adam(), loss=BinaryCrossentropy(), metrics=[binary_accuracy])

  return model

### Train

In [0]:
fm = build_fm(input_info, k_latent=2)

In [0]:
fm.fit(dataset_train, batch_size=batch_size, epochs=epochs, steps_per_epoch=steps_per_epoch)

In [0]:
fm.save_weights(os.path.join(DATA_PATH, 'model', 'fm_weights.h5'), overwrite=True, include_optimizer=False)

### Evaluate

In [0]:
# fm_test = build_fm(input_info, k_latent=2)
# fm_test.load_weights(os.path.join(DATA_PATH, 'model', 'fm_weights.h5'))

In [0]:
# eval_id = tf.constant([], dtype=tf.int64)
# eval_click = tf.constant([], dtype=tf.int64)
# eval_prediction = tf.constant([], dtype=tf.float32)

# for batch in tqdm.tqdm(dataset_test, total=np.ceil(n_events_test / batch_size_update)):
#   predictions = fm_test.predict(batch, steps=1)
  
#   recommendation_indices = get_recommendation_indices(predictions, n_candidates, k_recommendations)
#   recommendation_data = get_recommendation_data(batch, recommendation_indices)
  
#   eval_id = tf.concat([eval_id, batch['event_id']], 0)
#   eval_click = tf.concat([eval_click, batch['click_label']], 0)
#   eval_prediction = tf.concat([eval_prediction, predictions.flatten()], 0)
  
#   batch_size = (predictions.shape[0] // n_candidates * k_recommendations)
  
#   fm_test.fit(recommendation_data, recommendation_data['click_label'], verbose=0, batch_size=batch_size, steps_per_epoch=1)

In [0]:
# evaluate(eval_id, eval_click, eval_prediction, k_recommendations)

## 3. Wide & Deep

In [0]:
def build_wd(input_info):
  inputs = [Input(shape=shape, name=name) for name, shape in input_info]
  
  inputs_concat = Concatenate()(inputs)
  
  wide = Concatenate()(inputs)
  
  deep = Dense(256, activation=relu)(inputs_concat)
  deep = Dense(128, activation=relu)(deep)
  
  wide_deep = Concatenate()([wide, deep])
  
  outputs = Dense(1, activation=sigmoid)(wide_deep)
  
  model = Model(inputs=inputs, outputs=outputs)
  
  model.compile(RMSprop(), loss=BinaryCrossentropy(), metrics=[binary_accuracy])
  
  return model

### Train

In [0]:
wd = build_wd(input_info)

In [0]:
wd.fit(dataset_train, batch_size=batch_size, epochs=epochs, steps_per_epoch=steps_per_epoch)

In [0]:
wd.save(os.path.join(DATA_PATH, 'model', 'wd_weights.h5'), overwrite=True, include_optimizer=False)

### Evaluate

In [0]:
# wd_test = build_wd(input_info)
# wd_test.load_weights(os.path.join(DATA_PATH, 'model', 'wd_weights.h5'))

In [0]:
# eval_id = tf.constant([], dtype=tf.int64)
# eval_click = tf.constant([], dtype=tf.int64)
# eval_prediction = tf.constant([], dtype=tf.float32)

# for batch in tqdm.tqdm(dataset_test, total=np.ceil(n_events_test / batch_size_update)):
#   predictions = wd_test.predict(batch, steps=1)
  
#   recommendation_indices = get_recommendation_indices(predictions, n_candidates, k_recommendations)
#   recommendation_data = get_recommendation_data(batch, recommendation_indices)
  
#   eval_id = tf.concat([eval_id, batch['event_id']], 0)
#   eval_click = tf.concat([eval_click, batch['click_label']], 0)
#   eval_prediction = tf.concat([eval_prediction, predictions.flatten()], 0)
  
#   batch_size = (predictions.shape[0] // n_candidates * k_recommendations)
  
#   wd_test.fit(recommendation_data, recommendation_data['click_label'], verbose=0, batch_size=batch_size, steps_per_epoch=1)

In [0]:
# evaluate(eval_id, eval_click, eval_prediction, k_recommendations)

## 4. DQN without future reward

In [0]:
def build_dqn(input_info, state_indices):
  inputs = [Input(shape=shape, name=name) for name, shape in input_info]
  
  inputs_concat = Concatenate()(inputs)
  
  value = Concatenate()([inputs[i] for i in state_indices])
  value = Dense(256, activation=relu)(value)
  value = Dense(128, activation=relu)(value)
  value = Dense(1)(value)
  
  advantage = Dense(256, activation=relu)(inputs_concat)
  advantage = Dense(128, activation=relu)(advantage)
  advantage = Dense(1)(advantage)

  value_advantage = Concatenate()([value, advantage])
  
  outputs = Dense(1)(value_advantage)

  model = Model(inputs=inputs, outputs=outputs)

  model.compile(Adam(), loss=MeanSquaredError())

  return model

### Train

In [0]:
dqn = build_dqn(input_info, [1, 3])

In [0]:
dqn.fit(dataset_train, batch_size=batch_size, epochs=epochs, steps_per_epoch=steps_per_epoch)

In [0]:
dqn.save_weights(os.path.join(DATA_PATH, 'model', 'dqn_weights.h5'), overwrite=True, include_optimizer=False)

In [0]:
dqnu = build_dqn(input_info, [1, 3])

In [0]:
dqnu.fit(dataset_train, batch_size=batch_size, epochs=epochs, steps_per_epoch=steps_per_epoch)

In [0]:
dqnu.save_weights(os.path.join(DATA_PATH, 'model', 'dqnu_weights.h5'), overwrite=True, include_optimizer=False)

### Evaluate

In [0]:
# dqn_test = build_dqn(input_info, [1, 3])
# dqn_test.load_weights(os.path.join(DATA_PATH, 'model', 'dqn_weights.h5'))

In [0]:
# eval_id = tf.constant([], dtype=tf.int64)
# eval_click = tf.constant([], dtype=tf.int64)
# eval_prediction = tf.constant([], dtype=tf.float32)

# for batch in tqdm.tqdm(dataset_test, total=np.ceil(n_events_test / batch_size_update)):
#   predictions = dqn_test.predict(batch, steps=1)
  
#   recommendation_indices = get_recommendation_indices(predictions, n_candidates, k_recommendations)
#   recommendation_data = get_recommendation_data(batch, recommendation_indices)
  
#   eval_id = tf.concat([eval_id, batch['event_id']], 0)
#   eval_click = tf.concat([eval_click, batch['click_label']], 0)
#   eval_prediction = tf.concat([eval_prediction, predictions.flatten()], 0)
  
#   batch_size = (predictions.shape[0] // n_candidates * k_recommendations)
  
#   dqn_test.fit(recommendation_data, recommendation_data['click_label'], verbose=0, batch_size=batch_size, steps_per_epoch=1)

In [0]:
# evaluate(eval_id, eval_click, eval_prediction, k_recommendations)

## 5. DDQN

In [0]:
# class DDQN(object):
#   def __init__(
#       self, input_info, state_indices, weights_path,
#       target_update_steps, major_update_steps,
#       future_reward_discount, user_activeness_coef,
#       epsilon, explore_coef, exploit_coef,
#   ):
    
#     self.dqn = self._build_dqn(input_info, state_indices)
#     self.dqn.load_weights(weights_path)
    
#     self.target_dqn = self._build_dqn(input_info, state_indices)
#     self.target_dqn.load_weights(weights_path)
    
#     self.explore_dqn = self._build_dqn(input_info, state_indices)
#     self.explore_dqn.load_weights(weights_path)
    
#     self.target_update_steps = target_update_steps
#     self.major_update_steps = major_update_steps
    
#     self.future_reward_discount = tf.constant(future_reward_discount, tf.float32)
#     self.user_activeness_coef = tf.constant(user_activeness_coef, tf.float32)
    
#     self.epsilon = epsilon
#     self.explore_coef = explore_coef
#     self.exploit_coef = exploit_coef
    
#     self.memory = []
    
#   def _build_dqn(self, input_info, state_indices):
#     inputs = [Input(shape=shape, name=name) for name, shape in input_info]

#     inputs_concat = Concatenate()(inputs)

#     value = Concatenate()([inputs[i] for i in state_indices])
#     value = Dense(256, activation=relu)(value)
#     value = Dense(128, activation=relu)(value)
#     value = Dense(1)(value)

#     advantage = Dense(256, activation=relu)(inputs_concat)
#     advantage = Dense(128, activation=relu)(advantage)
#     advantage = Dense(1)(advantage)

#     value_advantage = Concatenate()([value, advantage])

#     outputs = Dense(1)(value_advantage)

#     model = Model(inputs=inputs, outputs=outputs)

#     model.compile(Adam(), loss=MeanSquaredError())

#     return model
  
#   def _expand_news_data(self, ts, n, k):
#     return tf.reshape(tf.broadcast_to(tf.reshape(ts, [n, k, ts.shape[1]]), [n, k*k, ts.shape[1]]), [n*k*k, ts.shape[1]])
  
#   def _expand_user_data(self, ts, n, k):
#     return tf.reshape(tf.broadcast_to(ts, [n*k, ts.shape[1]*k]), [n*k*k, ts.shape[1]])
  
#   def _update_target_dqn(self):
#     self.target_dqn.set_weights(self.dqn.get_weights())
    
#   def _interleave(self, list0, list1):
#     lists = [list0, list1]

#     interleaved = []

#     for i in np.random.randint(2, size=len(list0)):
#       item = lists[i].pop()
#       lists[1-i].remove(item)
#       interleaved.append(item)

#     return interleaved
  
#   def add_future_rewards_user_activeness(self, batch, k):
#     n = batch['click_label'].shape[0].value // k
    
#     inputs = {
#         'news_features': self._expand_news_data(batch['news_features'], n, k),
#         'user_features': self._expand_user_data(batch['user_features_next'], n, k),
#         'user_news_features': self._expand_news_data(batch['user_news_features_next'], n, k),
#         'context_features': self._expand_news_data(batch['context_features'], n, k),
#     }

#     target_actions = self.target_dqn.predict(inputs, steps=1).reshape([-1, k]).argmax(1)
#     target_actions = np.reshape(np.reshape(np.arange(n) * k, [-1, 1]) + target_actions.reshape([n, k]), [-1, 1])

#     inputs = {
#         'news_features': tf.gather_nd(batch['news_features'], target_actions),
#         'user_features': batch['user_features_next'],
#         'user_news_features': tf.gather_nd(batch['user_news_features_next'], target_actions),
#         'context_features': tf.gather_nd(batch['context_features'], target_actions),
#     }

#     future_rewards = self.dqn.predict(inputs, steps=1).flatten()

#     rewards = (
#         tf.dtypes.cast(batch['click_label'], tf.float32)
#         + self.future_reward_discount * tf.convert_to_tensor(future_rewards, tf.float32)
#         + self.user_activeness_coef * batch['user_activeness']
#     )
    
#     batch['reward'] = rewards
    
#     return batch
  
#   def predict(self, batch):
#     return self.dqn.predict(batch, steps=1)
  
#   def predict_eg(self, batch):
#     batch_size = batch['click_label'].shape[0].value
    
#     if np.random.rand() < self.epsilon:
#       return np.random.rand(batch_size, 1)
#     else:
#       return self.predict(batch)
    
#   def predict_dbgd(self, batch):
#     self.explore_dqn.set_weights([np.random.uniform(-1, 1) * self.explore_coef * w for w in self.dqn.get_weights()])
      
#     indices_0 = self.dqn.predict(batch, steps=1).flatten().argsort().tolist()
#     indices_1 = self.explore_dqn.predict(batch, steps=1).flatten().argsort().tolist()

#     indices = self._interleave(indices_0, indices_1)

#     predictions = np.zeros(len(indices))
#     predictions[indices] = 1. / np.arange(1, len(indices) + 1)

#     return predictions
    
#   def fit_batch(self, batch, k_recommendations):
#     batch = tf.data.Dataset.from_tensor_slices(batch).batch(self.target_update_steps * k_recommendations)
    
#     for mini_batch in batch:
#       batch_size = mini_batch['reward'].shape[0].value
#       self.dqn.fit(mini_batch, mini_batch['reward'], verbose=0, batch_size=batch_size, steps_per_epoch=1)
#       self._update_target_dqn()
      
#   def minor_update(self, batch, k_recommendations):
#     self._update_explore_dqn(batch, k_recommendations)
    
#     self.memory.append(batch)
    
#     if len(self.memory) >= self.major_update_steps:
#       self._major_update(k_recommendations)
      
#       self.memory = []
      
#   def _major_update(self, k_recommendations):
#     batch_size = self.target_update_steps * k_recommendations
    
#     memory = tf.data.Dataset.from_tensor_slices(self.memory[0])
#     for batch in self.memory[1:]:
#       memory = memory.concatenate(tf.data.Dataset.from_tensor_slices(batch))
#     memory = memory.batch(batch_size)
    
#     for batch in memory:
#       self.dqn.fit(batch, batch['reward'], verbose=0, batch_size=batch_size, steps_per_epoch=1)
#       self._update_target_dqn()
      
#   def _update_explore_dqn(self, batch, k_recommendations):
#     predictions_0 = self.dqn.predict(batch, steps=1)
#     predictions_1 = self.explore_dqn.predict(batch, steps=1)
    
#     click_labels = batch['click_label']
    
#     ndcg_0 = self._calculate_ndcg(predictions_0, click_labels, k_recommendations)
#     ndcg_1 = self._calculate_ndcg(predictions_1, click_labels, k_recommendations)
    
#     if ndcg_0 < ndcg_1:
#       self.dqn.set_weights([w0 + self.exploit_coef * w1 for (w0, w1) in zip(self.dqn.get_weights(), self.explore_dqn.get_weights())])
      
#   def _calculate_ndcg(self, predictions, click_labels, k_recommendations):
#     # Convert tensor to numpy array
#     click_labels = click_labels.numpy().flatten()
    
#     n_events = click_labels.shape[0] // k_recommendations
    
#     indices = (
#         np.argsort(np.reshape(predictions, [n_events, k_recommendations]), 1)
#         + np.reshape(np.arange(n_events) * k_recommendations, [-1, 1])
#     )
    
#     indices = indices.flatten()
    
#     discount = np.zeros(indices.shape)
#     discount[indices] = np.tile(np.arange(2, k_recommendations + 2), n_events)
    
#     ndcg = click_labels / np.log2(discount)
    
#     return ndcg.mean()

### DDQN

In [0]:
# weights_path = os.path.join(DATA_PATH, 'model', 'dqn_weights.h5')

# ddqn = DDQN(
#     input_info, [1, 3], weights_path,
#     target_update_steps=50, major_update_steps=2,
#     future_reward_discount=0.1, user_activeness_coef=0,
#     epsilon=0, explore_coef=0, exploit_coef=0,
# )

In [0]:
# eval_id = tf.constant([], dtype=tf.int64)
# eval_click = tf.constant([], dtype=tf.int64)
# eval_prediction = tf.constant([], dtype=tf.float32)

# for batch in tqdm.tqdm(dataset_test, total=np.ceil(n_events_test / batch_size_update)):
#   batch = ddqn.add_future_rewards_user_activeness(batch, n_candidates)
  
#   predictions = ddqn.predict(batch)
  
#   recommendation_indices = get_recommendation_indices(predictions, n_candidates, k_recommendations)
#   recommendation_data = get_recommendation_data(batch, recommendation_indices)
  
#   eval_id = tf.concat([eval_id, batch['event_id']], 0)
#   eval_click = tf.concat([eval_click, batch['click_label']], 0)
#   eval_prediction = tf.concat([eval_prediction, predictions.flatten()], 0)
  
#   ddqn.fit_batch(recommendation_data, k_recommendations)

In [0]:
# evaluate(eval_id, eval_click, eval_prediction, k_recommendations)

### DDQN + U

In [0]:
# weights_path = os.path.join(DATA_PATH, 'model', 'dqnu_weights.h5')

# ddqn = DDQN(
#     input_info, [1, 3], weights_path,
#     target_update_steps=50, major_update_steps=2,
#     future_reward_discount=0.1, user_activeness_coef=0.05,
#     epsilon=0, explore_coef=0, exploit_coef=0,
# )

In [0]:
# eval_id = tf.constant([], dtype=tf.int64)
# eval_click = tf.constant([], dtype=tf.int64)
# eval_prediction = tf.constant([], dtype=tf.float32)

# for batch in tqdm.tqdm(dataset_test, total=np.ceil(n_events_test / batch_size_update)):
#   batch = ddqn.add_future_rewards_user_activeness(batch, n_candidates)
  
#   predictions = ddqn.predict(batch)
  
#   recommendation_indices = get_recommendation_indices(predictions, n_candidates, k_recommendations)
#   recommendation_data = get_recommendation_data(batch, recommendation_indices)
  
#   eval_id = tf.concat([eval_id, batch['event_id']], 0)
#   eval_click = tf.concat([eval_click, batch['click_label']], 0)
#   eval_prediction = tf.concat([eval_prediction, predictions.flatten()], 0)
  
#   ddqn.fit_batch(recommendation_data, k_recommendations)

In [0]:
# evaluate(eval_id, eval_click, eval_prediction, k_recommendations)

### DDQN + U + EG

In [0]:
# weights_path = os.path.join(DATA_PATH, 'model', 'dqnu_weights.h5')

# ddqn = DDQN(
#     input_info, [1, 3], weights_path,
#     target_update_steps=50, major_update_steps=2,
#     future_reward_discount=0.1, user_activeness_coef=0.05,
#     epsilon=0.05, explore_coef=0, exploit_coef=0,
# )

In [0]:
# eval_id = tf.constant([], dtype=tf.int64)
# eval_click = tf.constant([], dtype=tf.int64)
# eval_prediction = tf.constant([], dtype=tf.float32)

# for batch in tqdm.tqdm(dataset_test, total=np.ceil(n_events_test / batch_size_update)):
#   batch = ddqn.add_future_rewards_user_activeness(batch, n_candidates)
  
#   predictions = ddqn.predict_eg(batch)
  
#   recommendation_indices = get_recommendation_indices(predictions, n_candidates, k_recommendations)
#   recommendation_data = get_recommendation_data(batch, recommendation_indices)
  
#   eval_id = tf.concat([eval_id, batch['event_id']], 0)
#   eval_click = tf.concat([eval_click, batch['click_label']], 0)
#   eval_prediction = tf.concat([eval_prediction, predictions.flatten()], 0)
  
#   ddqn.fit_batch(recommendation_data, k_recommendations)

In [0]:
# evaluate(eval_id, eval_click, eval_prediction, k_recommendations)

### DDQN + U + DBGD

In [0]:
# weights_path = os.path.join(DATA_PATH, 'model', 'dqnu_weights.h5')

# ddqn = DDQN(
#     input_info, [1, 3], weights_path,
#     target_update_steps=50, major_update_steps=2,
#     future_reward_discount=0.1, user_activeness_coef=0.05,
#     epsilon=0, explore_coef=0.1, exploit_coef=0.05,
# )

In [0]:
# eval_id = tf.constant([], dtype=tf.int64)
# eval_click = tf.constant([], dtype=tf.int64)
# eval_prediction = tf.constant([], dtype=tf.float32)

# for batch in tqdm.tqdm(dataset_test, total=np.ceil(n_events_test / batch_size_update)):
#   batch = ddqn.add_future_rewards_user_activeness(batch, n_candidates)
  
#   predictions = ddqn.predict_dbgd(batch)
  
#   recommendation_indices = get_recommendation_indices(predictions, n_candidates, k_recommendations)
#   recommendation_data = get_recommendation_data(batch, recommendation_indices)
  
#   eval_id = tf.concat([eval_id, batch['event_id']], 0)
#   eval_click = tf.concat([eval_click, batch['click_label']], 0)
#   eval_prediction = tf.concat([eval_prediction, predictions.flatten()], 0)
  
#   ddqn.minor_update(recommendation_data, k_recommendations)

In [0]:
# evaluate(eval_id, eval_click, eval_prediction, k_recommendations)