In [1]:
from google.colab import drive
drive.mount('/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /drive


### Load Libraries 

In [2]:
import pandas as pd 
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from random import choice
from collections import defaultdict

### Load Training / Validation Datasets

In [3]:
# Load Data
base_dir  = '/drive/My Drive/DSC672_Project/datasets/CASER_DATA/train/'
train_ds  = pd.read_csv(
    base_dir + 'train.csv',header=None,
    index_col=0,names=['uid','iid','rui','date']
).drop(['date','rui'],axis=1)
val_ds   = pd.read_csv(
    base_dir + 'validation.csv',header=None,
    index_col=0,names=['uid','iid','rui','date']
).drop(['date','rui'],axis=1)

# Create User and Item Mappings to Integers
user_encoder = dict((uid,i) for i,uid in enumerate(train_ds.uid.unique(),start=1))
user_encoder['unknown'] = 0
user_decoder = dict((v,k) for k,v in user_encoder.items())
item_encoder = dict((iid,i) for i,iid in enumerate(train_ds.iid.unique(),start=1))
item_encoder['unknown'] = 0
item_decoder = dict((v,k) for k,v in item_encoder.items())

# Create Set of all encoded Item ids
item_set = set(item_encoder.values())

# Convert Raw id dataframes to encoded id dataframes
uid = train_ds.uid.apply(lambda x: user_encoder[x])
iid = train_ds.iid.apply(lambda x: item_encoder[x])

train_ds = pd.DataFrame({'uid':uid,'iid':iid})

uid = val_ds.uid.apply(lambda x: user_encoder[x] if x in user_encoder else user_encoder['unknown'])
iid = val_ds.iid.apply(lambda x: item_encoder[x] if x in item_encoder else item_encoder['unknown'])

val_ds = pd.DataFrame({'uid':uid,'iid':iid})

# Aggregate to sequences of climbed routes
# and generate sets of all routes climbed by user
train_ds_seq = train_ds.groupby('uid').agg([list,set])
val_ds_seq = val_ds.groupby('uid').agg([list,set])

# For training data generate length of sequence for 
# creating training sequences 
train_ds_seq['sequence_len'] = train_ds_seq['iid','list'].apply(len)

# For validation data create ragged tensor
# of routes climbed after training data
val_items = tf.ragged.constant(val_ds_seq['iid','list'])

### Function to Create Training input tensors

In [4]:
def to_sequences(df,L=5,T=3,n_ratio=3):
  total_len = L + T 
  n_samples = sum([c - total_len + 1 if c >= total_len else 1 for c in df['sequence_len']])
  
  n_users = df.shape[0]
  test_uid = np.empty((n_users,1),np.int32)
  test_sequence = np.empty((n_users,L),np.int32)

  uid_input = np.empty((n_samples,1),np.int32)
  seq_input = np.empty((n_samples,L),np.int32)
  pos_targets = np.empty((n_samples,T),np.int32)
  neg_targets = np.empty((n_samples,T*n_ratio),np.int32)
  
  train_idx = 0
  test_idx = 0
  for breakpoint, (uid, row) in enumerate(df.iterrows()):
    
    seq, set_, _ = row
    seq = np.array(seq)
    size = len(seq)
    if size < total_len:
      pad_length = total_len - size
      seq = np.pad(seq,(pad_length,0),constant_values=0)
      size = total_len 


    test_uid[test_idx] = uid
    test_sequence[test_idx] = seq[-L:] 

    neg_set = np.array([i for i in (item_set - set_)])
    for i in range(size - L - T + 1):
      uid_input[train_idx] = uid
      seq_input[train_idx] = np.array(seq[i:i+L],np.int32)
      pos_targets[train_idx] = np.array(seq[i+L:i+L+T],np.int32)
      neg_targets[train_idx] = np.random.choice(neg_set,T*n_ratio,replace=False)
      train_idx += 1
    test_idx += 1
  
  return {'training':(uid_input,seq_input,pos_targets,neg_targets),'test':(test_uid,test_sequence)}

### Functions to assess model performance

In [5]:
def top_k_iids(tf_model,uid,iids,k=25):
  # Expand to 2d Tensors
  uid = tf.expand_dims(uid,0)
  iids = tf.expand_dims(iids,0)
  # Predict ranking for all items 
  predictions = tf_model.predict_all_iids(uid,iids)
  # Reduce Prediction to 1d Tensor
  predictions = tf.squeeze(predictions)
  # Get Top-k Items 
  top_k = tf.argsort(predictions,direction='DESCENDING')[:k]
  return top_k 

def precision_and_recall(tf_model,uid,seq_iids,next_iids,k_vals=[1,5,10,25]):
  max_k = max(k_vals)
  top_max_k = top_k_iids(tf_model,uid,seq_iids,max_k).numpy()

  precision_at_k = dict()
  recall_at_k = dict()

  relevant = set(next_iids.numpy())

  for k in k_vals:

    top_k = set(top_max_k[:k])

    rel_and_rec = top_k & relevant

    precision_at_k[k] = len(rel_and_rec) / k
    recall_at_k[k] = len(rel_and_rec) / len(relevant)
  return (precision_at_k,recall_at_k)

def validation_metric(tf_model,tf_dataset,k_vals=[1,5,10,25]):
  precision_at_k = defaultdict(list)
  recall_at_k = defaultdict(list)

  mean_precision = defaultdict(float)
  mean_recall = defaultdict(float)

  for obs in tf_dataset:
    uid, seq_iids, next_iids = obs
    precision, recall = precision_and_recall(tf_model,uid,seq_iids,next_iids,k_vals)
    for key in precision.keys():
      precision_at_k[key].append(precision[key])
      recall_at_k[key].append(recall[key])
  
  for key, val in precision_at_k.items():
    mean_precision[key] = np.array(val).mean()
  for key, val in recall_at_k.items():
    mean_recall[key] = np.array(val).mean()

  return mean_precision, mean_recall

### Caser Model 

In [6]:
class TFCaser(tf.Module):
  def __init__(self,n_uid,n_iid,L,
               embedding=8,n_v=16,n_h=16,
               dropout_rate=0.5,l2_lambda=0.001,
               conv_act=tf.nn.relu,
               dense_act=tf.nn.relu,
               initializer = tf.initializers.GlorotNormal
    ):
    super(TFCaser,self).__init__(name='Caser')
    # Item and User Embedding
    self.n_uid = n_uid
    self.n_iid = n_iid
    self.embedding = embedding
    self.initalizer = initializer()


    self.U = tf.Variable(
        self.initalizer(shape=(self.n_uid,self.embedding))
    )
    self.I = tf.Variable(
        self.initalizer((self.n_iid,self.embedding))
    )

    # Sequence Length
    self.L = tf.Variable(tf.constant(L),trainable=False)
    self.int_L = L

    # Vertical and Horizontal Convolutions
    self.n_v = n_v
    self.v_conv = tf.Variable(self.initalizer((self.L,1,1,self.n_v)))
    self.v_conv_bias = tf.Variable(tf.zeros(self.n_v))
    self.n_h = n_h
    self.lengths = [i for i in range(1,L+1)]
    self.h_conv  = [
        tf.Variable(self.initalizer((l,self.embedding,1,self.n_h)))
        for l in self.lengths
    ]
    self.h_conv_bias = [
        tf.Variable(tf.zeros(self.n_h))
        for _ in self.lengths
    ]
    # Convolution Activation
    self.conv_act = conv_act

    # Dense Layer
    ## Input Size
    self.dense_v = self.embedding * self.n_v
    self.dense_h = self.n_h * len(self.lengths)
    self.dense_in = self.dense_v + self.dense_h
    ## Activation
    self.dense_act = dense_act
    ## Weights and Biases
    self.W_1 = tf.Variable(self.initalizer((self.dense_in,self.embedding)))
    self.B_1 = tf.Variable(tf.zeros(self.embedding))

    # Output Layer
    self.W_2 = tf.Variable(self.initalizer((self.n_iid,self.embedding * 2)))
    self.B_2 = tf.Variable(tf.zeros(self.n_iid))

    # Regularization
    self.dropout_rate = dropout_rate
    self.l2_lambda = l2_lambda

    # Weight Tensors for L2 Regularization
    self.weights = [
        self.U,self.I,
        self.W_1,self.W_2,
        self.v_conv
    ] + self.h_conv



  def forward_to_last(self,uid,iids,training):
    u_emb = tf.squeeze(tf.nn.embedding_lookup(self.U,uid),1)

    s_emb = tf.nn.embedding_lookup(self.I,iids)
    s_emb = tf.expand_dims(s_emb,-1)

    # Vertical Convolutions
    v = self.conv_act(tf.nn.conv2d(s_emb,self.v_conv,1,'VALID'))
    v = tf.nn.bias_add(v,self.v_conv_bias)
    v = tf.reshape(v,(-1,self.dense_v))

    # Horizontal Convolutions
    out_hs = tf.TensorArray(tf.float32,self.L)

    for idx, (conv, conv_bias) in enumerate(zip(self.h_conv,self.h_conv_bias)):
      h = self.conv_act(tf.nn.conv2d(s_emb,conv,1,'VALID'))
      h = tf.nn.bias_add(h,conv_bias)
      h = tf.squeeze(h,2)
      h = tf.nn.max_pool1d(h,h.shape[1],1,'VALID')
      out_hs = out_hs.write(idx,tf.transpose(h))
    h = out_hs.concat()
    h = tf.transpose(h)
    h = tf.squeeze(h,1)

    # Concat Vertical and Horizontal Convolutional Outputs
    dense_in = tf.concat([v,h],1)
    # Apply Dense Layer
    dense_out = tf.add(tf.matmul(dense_in,self.W_1),self.B_1)
    dense_out = self.dense_act(dense_out)

    dense_out = tf.cond(
        training,
        lambda: tf.nn.dropout(dense_out,self.dropout_rate),
        lambda: dense_out
    )
    #if training:
      # If training model apply dropout
      #dense_out = tf.nn.dropout(dense_out,self.dropout_rate)
    
    # Concat user embedding with dense layer
    x = tf.concat([u_emb,dense_out],1)
    return x


  def predict_with_iids(self,uid,seq_iids,target_iids,training):
    x = self.forward_to_last(uid,seq_iids,training)
    w2 = tf.nn.embedding_lookup(self.W_2,target_iids)
    b2 = tf.nn.embedding_lookup(self.B_2,target_iids)
    x = tf.expand_dims(x,1)
    res = tf.reduce_sum(x*w2,-1) + b2
    return res


  def predict_all_iids(self,uid,seq_iids):
    x = self.forward_to_last(uid,seq_iids,tf.constant(False))
    res = tf.add(tf.matmul(x,self.W_2,transpose_b=True),self.B_2)
    return res

  def build(self):
    # Build TF Functions
    self.forward_to_last = tf.function(
      self.forward_to_last,
      input_signature=[
          tf.TensorSpec((None,1),tf.int32,name='User ID'),
          tf.TensorSpec((None,self.L.numpy()),tf.int32,name='Item Sequence'),
          tf.TensorSpec((),tf.bool,name='Training Switch')
      ]
    )

    self.predict_with_iids = tf.function(
        self.predict_with_iids,
        input_signature=[
          tf.TensorSpec((None,1),tf.int32,name='User ID'),
          tf.TensorSpec((None,self.L.numpy()),tf.int32,name='Item Sequence'),
          tf.TensorSpec((None,None),tf.int32,name='Prediction Items'),
          tf.TensorSpec((),tf.bool,name='Training Switch')
      ]
    )

    self.predict_all_iids = tf.function(
        self.predict_all_iids,
        input_signature=[
          tf.TensorSpec((None,1),tf.int32,name='User ID'),
          tf.TensorSpec((None,self.L.numpy()),tf.int32,name='Item Sequence')
      ]

    )

### Caser Training Function

In [7]:
def train_caser(caser_model,training_dataset,
                n_epochs,optimizer,batch_size,
                steps_per_epoch,
                validation_dataset=None,
                val_monitor=None):
  if val_monitor == None:
    val_monitor = n_epochs

  training_dataset = training_dataset.shuffle(1000)
  training_dataset = training_dataset.batch(batch_size=batch_size)


  history = {'train_loss':[],'precision':[],'recall':[]}
  for e in range(n_epochs):
    epoch_loss = 0
    for bdx, batch in enumerate(training_dataset,start=1):
      uid, seq, y_poss, y_negs = batch
      
      with tf.GradientTape() as tape:
        pos_pred = caser_model.predict_with_iids(uid, seq, y_poss,training=tf.constant(True))
        neg_pred = caser_model.predict_with_iids(uid, seq, y_negs,training=tf.constant(True))
        pos_loss = - tf.reduce_mean(tf.math.log(tf.sigmoid(pos_pred + 1e-10)))
        neg_loss = - tf.reduce_mean(tf.math.log((1-tf.sigmoid(neg_pred))+1e-10))
        total_loss = pos_loss + neg_loss
        l2 = sum([tf.nn.l2_loss(t).numpy() for t in caser_model.weights])
        reg_loss = (l2 * caser_model.l2_lambda) + total_loss
      params= caser_model.trainable_variables
      grads = tape.gradient(reg_loss,params)
      optimizer.apply_gradients(zip(grads,params))
      epoch_loss = epoch_loss + total_loss
      if bdx == steps_per_epoch: break

    loss = epoch_loss/(bdx) 
    history['train_loss'].append(loss.numpy())
    print(f'Epoch {e} train loss: {loss.numpy():0.4f}')

    if (e + 1) % val_monitor == 0 and validation_dataset:
      precision, recall = validation_metric(caser_model,validation_dataset)
      history['recall'].append(recall)
      history['precision'].append(precision)
  
  return history

### Random Search

Building Training Sequences is slow so for sets of L and T perform random search for each set

#### Caser L - T


In [8]:
# Set Up Experiment Search Space

LOG_FILE = '/drive/My Drive/DSC672_Project/model_results/CASER/random_search.csv'

N_USERS = len(user_encoder)
N_ITEMS = len(item_encoder)

SEQ_LENGTH = 9
T = 1
L2_REG = 1e-4
N_EPOCHS = 10
OPTIMIZER = tf.optimizers.Adam(1e-3)
BATCH_SIZE = 256

EMBEDDING_DIMS = [2**i for i in range(2,10)]
NUM_FILTERS = [2**i for i in range(1,7)]
ACTIVATIONS = {
    'ReLU':tf.nn.relu,
    'Sigmoid':tf.nn.sigmoid,
    'Tanh':tf.nn.tanh,
    'Linear':tf.keras.activations.linear
}


NUMBER_OF_EXPERIMENTS = 3


seq_data = to_sequences(train_ds_seq,SEQ_LENGTH,T,n_ratio=5)
uids,iids,y_pos,y_neg = seq_data['training']
val_uids,val_seq = seq_data['test']
val_uids = val_uids[val_ds_seq.index.to_numpy() - 1]
val_seq = val_seq[val_ds_seq.index.to_numpy() - 1]

training_dataset = tf.data.Dataset.from_tensor_slices((uids,iids,y_pos,y_neg))
validation_dataset = tf.data.Dataset.from_tensor_slices((val_uids,val_seq,val_items))

In [None]:
# Random Search Loop
for _ in range(NUMBER_OF_EXPERIMENTS):
  emb_dim = choice(EMBEDDING_DIMS)
  v_filters = choice(NUM_FILTERS)
  h_filters = choice(NUM_FILTERS)

  conv_act_name, conv_act = choice(list(ACTIVATIONS.items()))
  dense_act_name, dense_act = choice(list(ACTIVATIONS.items()))

  # Model Hyper Parameters
  model_hyper_params = [
    SEQ_LENGTH,T,
    emb_dim,v_filters,h_filters,
    conv_act_name,dense_act_name  
  ]

  # Create Model
  model = TFCaser(
      N_USERS, N_ITEMS, L=SEQ_LENGTH,
      embedding=emb_dim,n_v=v_filters,n_h=h_filters,l2_lambda=L2_REG,
      conv_act = conv_act, dense_act=dense_act
  )

  model.build()

  # Train Model
  model_history = train_caser(
      model,training_dataset,
      N_EPOCHS,OPTIMIZER,BATCH_SIZE,
      uids.shape[0] // BATCH_SIZE,
      validation_dataset
  )

  # Validation Metrics
  precision = list(model_history['precision'][-1].values())
  recall = list(model_history['recall'][-1].values())

  # Append Results and Hyperparameters to Logging File
  log_line = model_hyper_params + precision + recall 
  log_line = ','.join(map(str,log_line))
  with open(LOG_FILE,'a') as f:
    f.write(log_line + '\n')

Epoch 0 train loss: 1.0624
Epoch 1 train loss: 0.8451
Epoch 2 train loss: 0.6878
Epoch 3 train loss: 0.5802
Epoch 4 train loss: 0.5094
Epoch 5 train loss: 0.4578
Epoch 6 train loss: 0.4175
Epoch 7 train loss: 0.3842
Epoch 8 train loss: 0.3558
Epoch 9 train loss: 0.3316
Epoch 0 train loss: 1.2185
Epoch 1 train loss: 1.0772
Epoch 2 train loss: 0.9892
Epoch 3 train loss: 0.9190
Epoch 4 train loss: 0.8635
Epoch 5 train loss: 0.8176
Epoch 6 train loss: 0.7791
Epoch 7 train loss: 0.7455
Epoch 8 train loss: 0.7169
Epoch 9 train loss: 0.6921
Epoch 0 train loss: 1.1565
Epoch 1 train loss: 1.0134
Epoch 2 train loss: 0.9242
Epoch 3 train loss: 0.8528
Epoch 4 train loss: 0.7993
Epoch 5 train loss: 0.7577
Epoch 6 train loss: 0.7252
Epoch 7 train loss: 0.6978
Epoch 8 train loss: 0.6741
Epoch 9 train loss: 0.6528
Epoch 0 train loss: 1.5471
Epoch 1 train loss: 0.8600
Epoch 2 train loss: 0.5756
