In [None]:
import numpy as np

import tensorflow as tf
from tensorflow.keras.layers import Layer, Dense
from tensorflow.keras.regularizers import l2
from tensorflow.keras import layers, regularizers


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# Data Loader Function

In [None]:
def load_data_100k(path='./', delimiter='\t'):

    train = np.loadtxt(path+'movielens_100k_u1.base', skiprows=0, delimiter=delimiter).astype('int32')
    test = np.loadtxt(path+'movielens_100k_u1.test', skiprows=0, delimiter=delimiter).astype('int32')
    total = np.concatenate((train, test), axis=0)

    n_u = np.unique(total[:,0]).size  # num of users
    n_m = np.unique(total[:,1]).size  # num of movies
    n_train = train.shape[0]  # num of training ratings
    n_test = test.shape[0]  # num of test ratings

    train_r = np.zeros((n_m, n_u), dtype='float32')
    test_r = np.zeros((n_m, n_u), dtype='float32')

    for i in range(n_train):
        train_r[train[i,1]-1, train[i,0]-1] = train[i,2]

    for i in range(n_test):
        test_r[test[i,1]-1, test[i,0]-1] = test[i,2]

    train_m = np.greater(train_r, 1e-12).astype('float32')  # masks indicating non-zero entries
    test_m = np.greater(test_r, 1e-12).astype('float32')

    print('data matrix loaded')
    print('num of users: {}'.format(n_u))
    print('num of movies: {}'.format(n_m))
    print('num of training ratings: {}'.format(n_train))
    print('num of test ratings: {}'.format(n_test))

    return n_m, n_u, train_r, train_m, test_r, test_m

# Load Data

In [None]:
path='/content/drive/MyDrive/Colab Notebooks/MovieLens_100K/'
n_m, n_u, train_r, train_m, test_r, test_m = load_data_100k(path=path, delimiter='\t')

data matrix loaded
num of users: 943
num of movies: 1682
num of training ratings: 80000
num of test ratings: 20000


In [None]:
R = tf.Variable(tf.zeros([n_m, n_u], dtype=tf.float32))

# Hyperparameter Settings

In [None]:
n_hid = 500
n_dim = 5
n_layers = 2
gk_size = 3

In [None]:
lambda_2 = 20.  # l2 regularisation
lambda_s = 0.006
iter_p = 5  # optimisation
iter_f = 5
epoch_p = 30  # training epoch
epoch_f = 60
dot_scale = 1
learning_rate=0.001

# Network Function

In [None]:

class KernelLayer(Layer):
    def __init__(self, n_hid, n_dim, activation=tf.keras.activations.sigmoid, lambda_s=lambda_s, lambda_2=lambda_2, **kwargs):
        super(KernelLayer, self).__init__(**kwargs)
        self.n_hid = n_hid
        self.n_dim = n_dim
        self.activation = activation
        self.lambda_s = lambda_s
        self.lambda_2 = lambda_2

    def build(self, input_shape):
        n_in = input_shape[1]
        self.W = self.add_weight(name='W', shape=(n_in, self.n_hid), initializer='random_normal', trainable=True)
        self.u = self.add_weight(name='u', shape=(n_in, 1, self.n_dim), initializer='truncated_normal', trainable=True)
        self.v = self.add_weight(name='v', shape=(1, self.n_hid, self.n_dim), initializer='truncated_normal', trainable=True)
        self.b = self.add_weight(name='b', shape=(self.n_hid,), initializer='zeros', trainable=True)

    def call(self, x):
        w_hat = self.local_kernel(self.u, self.v)
        sparse_reg_term = self.lambda_s * tf.reduce_sum(tf.abs(w_hat))
        l2_reg_term = self.lambda_2 * tf.reduce_sum(tf.square(self.W))
        W_eff = self.W * w_hat
        y = tf.matmul(x, W_eff) + self.b
        y = self.activation(y)
        return y, sparse_reg_term + l2_reg_term

    def local_kernel(self, u, v):
        dist = tf.norm(u - v, ord=2, axis=2)
        hat = tf.maximum(0., 1. - dist**2)
        return hat


In [None]:
def global_kernel(input, gk_size, dot_scale):
    avg_pooling = tf.reduce_mean(input, axis=1)
    avg_pooling = tf.reshape(avg_pooling, [1, -1])
    n_kernel = avg_pooling.shape[1]

    conv_kernel = tf.Variable(tf.random.truncated_normal([n_kernel, gk_size**2], stddev=0.1), name='conv_kernel')
    gk = tf.matmul(avg_pooling, conv_kernel) * dot_scale
    gk = tf.reshape(gk, [gk_size, gk_size, 1, 1])
    return gk

In [None]:
def global_conv(input, W):
    input = tf.reshape(input, [1, input.shape[0], input.shape[1], 1])
    conv2d = tf.nn.relu(tf.nn.conv2d(input, W, strides=[1, 1, 1, 1], padding='SAME'))
    return tf.reshape(conv2d, [conv2d.shape[1], conv2d.shape[2]])

# Pre-training

In [None]:
y = R
reg_losses = None

for i in range(n_layers):
    y, reg_loss = KernelLayer(n_hid, n_dim, name=str(i))(y)
    reg_losses = reg_loss if reg_losses is None else reg_losses + reg_loss


pred_p, reg_loss = KernelLayer(n_u, n_dim, activation=tf.identity, name='out')(y)
reg_losses = reg_losses + reg_loss

# L2 loss
diff = train_m * (train_r - pred_p)
sqE = tf.nn.l2_loss(diff)
loss_f = sqE + reg_losses


# Define your optimizer


optimizer_p = tf.optimizers.Adam(learning_rate=0.001)  # Adjust the learning rate as needed

# You can now run the optimization by calling optimizer_p

# Fine-tuning

In [None]:
y = R
reg_losses = None

for i in range(n_layers):
    y, _ = KernelLayer(n_hid, n_dim, name=str(i))(y)

y_dash, _ = KernelLayer(n_u, n_dim, activation=tf.identity, name='out')(y)
gk = global_kernel(y_dash, gk_size, dot_scale)
y_hat = global_conv(train_r, gk)

for i in range(n_layers):
    y_hat, reg_loss = KernelLayer(n_hid,n_dim, name=str(i))(y_hat)
    reg_losses = reg_loss if reg_losses is None else reg_losses + reg_loss

pred_f, reg_loss = KernelLayer(n_u, n_dim, activation=tf.identity, name='out')(y_hat)
reg_losses = reg_losses + reg_loss

# L2 loss
diff = train_m * (train_r - pred_f)
sqE = tf.nn.l2_loss(diff)
loss_f = sqE + reg_losses


# Define your optimizer
optimizer_f =tf.optimizers.Adam(learning_rate=0.001)  # Adjust the learning rate as needed

# You can now run the optimization by calling optimizer_p in a session


# Evaluation Code

In [None]:
def dcg_k(score_label, k):
    dcg, i = 0., 0
    for s in score_label:
        if i < k:
            dcg += (2**s[1]-1) / np.log2(2+i)
            i += 1
    return dcg

In [None]:
def ndcg_k(y_hat, y, k):
    score_label = np.stack([y_hat, y], axis=1).tolist()
    score_label = sorted(score_label, key=lambda d:d[0], reverse=True)
    score_label_ = sorted(score_label, key=lambda d:d[1], reverse=True)
    norm, i = 0., 0
    for s in score_label_:
        if i < k:
            norm += (2**s[1]-1) / np.log2(2+i)
            i += 1
    dcg = dcg_k(score_label, k)
    return dcg / norm

In [None]:
def call_ndcg(y_hat, y):
    ndcg_sum, num = 0, 0
    y_hat, y = y_hat.T, y.T
    n_users = y.shape[0]

    for i in range(n_users):
        y_hat_i = y_hat[i][np.where(y[i])]
        y_i = y[i][np.where(y[i])]

        if y_i.shape[0] < 2:
            continue

        ndcg_sum += ndcg_k(y_hat_i, y_i, y_i.shape[0])  # user-wise calculation
        num += 1

    return ndcg_sum / num

# Training and Testing Loop

In [None]:
from time import time

best_rmse_ep, best_mae_ep, best_ndcg_ep = 0, 0, 0
best_rmse, best_mae, best_ndcg = float("inf"), float("inf"), 0
time_cumulative = 0



# Create an instance of your custom layer
kernel_layer = KernelLayer(n_hid,n_dim)

# Access the trainable variables
trainable_vars = kernel_layer.trainable_variables

#Pre-training
for i in range(epoch_p):
    tic = time()

    with tf.GradientTape() as tape:
         pre= KernelLayer(n_hid,n_dim)(train_r)
         clipped_pred= tf.clip_by_value(pred_p,1.0,5.0)
         squared_error = tf.square(clipped_pred - test_r)
         weighted_error= test_m * squared_error
         total_error=tf.reduce_sum(weighted_error)

         total_weight= tf.reduce_sum(test_m)
         test_error= total_error/total_weight

         test_rmse= tf.sqrt(test_error)

         squared_error_train = tf.square(clipped_pred - train_r)
         weighted_error_train= train_m * squared_error
         total_error_train=tf.reduce_sum(weighted_error_train)

         total_weight_train= tf.reduce_sum(train_m)

         train_error= total_error_train/total_weight_train

         train_rmse= tf.sqrt(train_error)

    grads = tape.gradient(test_error, pre)
    optimizer_p.apply_gradients(zip(grads,trainable_vars))

    t = time() - tic
    time_cumulative += t

    print('.-^-._' * 12)
    print('PRE-TRAINING')
    print('Epoch:', i+1, 'test rmse:', test_rmse, 'train rmse:', train_rmse)
    print('Time:', t, 'seconds')
    print('Time cumulative:', time_cumulative, 'seconds')
    print('.-^-._' * 12)

for i in range(epoch_f):
    tic = time()
    with tf.GradientTape() as tape:
         pre = pred_f
         clipped_pred= tf.clip_by_value(pre,1.0,5.0)
         squared_error_test = tf.square(clipped_pred - test_r)
         weighted_error_test= test_m * squared_error
         total_error_test=tf.reduce_sum(weighted_error)

         total_weight_test= tf.reduce_sum(test_m)
         test_error= total_error_test/total_weight_test

         test_rmse= tf.sqrt(test_error)

         squared_error_train = tf.square(clipped_pred - train_r)
         weighted_error_train= train_m * squared_error
         total_error_train=tf.reduce_sum(weighted_error_train)

         total_weight_train= tf.reduce_sum(train_m)

         train_error= total_error_train/total_weight_train

         train_rmse= tf.sqrt(train_error)


         absolute_error_test = tf.abs(clipped_pred - test_r)
         masked_absolute_error_test = test_m * absolute_error_test
         test_mae = tf.reduce_sum(masked_absolute_error_test) / tf.reduce_sum(test_m)

         absolute_error_train = tf.abs(clipped_pred - train_r)
         masked_absolute_error_train = train_m * absolute_error_train
         train_mae = tf.reduce_sum(masked_absolute_error_train) / tf.reduce_sum(train_m)



         test_ndcg = call_ndcg(np.clip(pre, 1., 5.), test_r)
         train_ndcg = call_ndcg(np.clip(pre, 1., 5.), train_r)

         if test_rmse < best_rmse:
            best_rmse = test_rmse
            best_rmse_ep = i+1

         if test_mae < best_mae:
            best_mae = test_mae
            best_mae_ep = i+1

         if best_ndcg < test_ndcg:
            best_ndcg = test_ndcg
            best_ndcg_ep = i+1

         print('.-^-._' * 12)
         print('FINE-TUNING')
         print('Epoch:', i+1, 'test rmse:', test_rmse, 'test mae:', test_mae, 'test ndcg:', test_ndcg)
         print('Epoch:', i+1, 'train rmse:', train_rmse, 'train mae:', train_mae, 'train ndcg:', train_ndcg)
         print('Time:', t, 'seconds')
         print('Time cumulative:', time_cumulative, 'seconds')
         print('.-^-._' * 12)


.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._
PRE-TRAINING
Epoch: 1 test rmse: tf.Tensor(2.7749732, shape=(), dtype=float32) train rmse: tf.Tensor(1.0150696, shape=(), dtype=float32)
Time: 0.11676216125488281 seconds
Time cumulative: 0.11676216125488281 seconds
.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._
.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._
PRE-TRAINING
Epoch: 2 test rmse: tf.Tensor(2.7749732, shape=(), dtype=float32) train rmse: tf.Tensor(1.0150696, shape=(), dtype=float32)
Time: 0.11619710922241211 seconds
Time cumulative: 0.23295927047729492 seconds
.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._
.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._
PRE-TRAINING
Epoch: 3 test rmse: tf.Tensor(2.7749732, shape=(), dtype=float32) train rmse: tf.Tensor(1.0150696, shape=(), dtype=float32)
Time: 0.11096024513244629 seconds
Time cumulative: 0.3439195156097

In [None]:
# Final result
print('Epoch:', best_rmse_ep, ' best rmse:', best_rmse)
print('Epoch:', best_mae_ep, ' best mae:', best_mae)
print('Epoch:', best_ndcg_ep, ' best ndcg:', best_ndcg)

Epoch: 1  best rmse: tf.Tensor(2.7749732, shape=(), dtype=float32)
Epoch: 1  best mae: tf.Tensor(2.529111, shape=(), dtype=float32)
Epoch: 1  best ndcg: 0.8371781945397095
