In [1]:
import tensorflow as tf
from tensorflow.keras.layers import Input, Embedding, Flatten, Dense, Concatenate
import numpy as np
import pandas as pd

2024-01-13 22:13:20.959583: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        # Currently, memory growth needs to be the same across GPUs
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        # Select GPU number 1
        tf.config.experimental.set_visible_devices(gpus[1], 'GPU')
        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
        # Memory growth must be set before GPUs have been initialized
        print(e)

2 Physical GPUs, 1 Logical GPUs


2024-01-13 22:13:22.044251: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2024-01-13 22:13:22.044374: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2024-01-13 22:13:22.048550: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2024-01-13 22:13:22.048660: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2024-01-13 22:13:22.048751: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from S

In [3]:
BATCH_SIZE = 64

In [4]:
class RecommenderModel(tf.keras.Model):
    def __init__(self, num_users, num_items, embedding_size=50, hidden_units=128, learning_rate=1e-4):
        super(RecommenderModel, self).__init__()
        self.optimizer = tf.optimizers.Adam(learning_rate=learning_rate)

        self.user_embedding = Embedding(input_dim=num_users, output_dim=embedding_size, input_length=1)
        self.item_embedding = Embedding(input_dim=num_items, output_dim=embedding_size, input_length=1)
        self.flatten = Flatten()
        self.concat = Concatenate()
        self.hidden_layer = Dense(hidden_units, activation='relu')
        self.output_layer = Dense(1, activation='sigmoid')

    def call(self, inputs):
        user_input, item_input = inputs
        user_embedded = self.user_embedding(user_input)
        item_embedded = self.item_embedding(item_input)
        user_flat = self.flatten(user_embedded)
        item_flat = self.flatten(item_embedded)
        merged = self.concat([user_flat, item_flat])
        hidden = self.hidden_layer(merged)
        output = self.output_layer(hidden)
        return output
    
    @tf.function
    def compute_loss(self, y_true: tf.Tensor, y_pred: tf.Tensor) -> tf.Tensor:
        '''
        Compute the MSE loss of the model
        '''
        loss = tf.losses.mean_squared_error(y_true, y_pred)

        return loss

    @tf.function  # the function decorated by tf.function will be compiled into a callable TensorFlow graph automatically. This allows the TensorFlow runtime to apply optimizations and exploit parallelism to boost computation performance.
    def train_step(self, data):
        with tf.GradientTape() as tape:
            # training=True is only needed if there are layers with different
            # behavior during training versus inference (e.g. Dropout).

            user_ids = tf.cast(data[:, 0], dtype=tf.int32)
            item_ids = tf.cast(data[:, 1], dtype=tf.int32)
            y_true = tf.cast(data[:, 2], dtype=tf.float32)

            y_pred = self([user_ids, item_ids])
            loss = self.compute_loss(y_true, y_pred)
            
        gradients = tape.gradient(loss, self.trainable_variables)
        self.optimizer.apply_gradients(zip(gradients, self.trainable_variables))

        return loss
    

    @tf.function
    def val_step(self, data):
        user_ids = tf.cast(data[:, 0], dtype=tf.int32)
        item_ids = tf.cast(data[:, 1], dtype=tf.int32)
        y_true = tf.cast(data[:, 2], dtype=tf.float32)

        y_pred = self([user_ids, item_ids])
        loss = self.compute_loss(y_true, y_pred)

        return loss

In [5]:
num_users = 2000
num_items = 200000
num_samples = 1000
num_validation_samples = int(0.2 * num_samples)
user_ids = np.random.randint(0, num_users, size=num_samples)
item_ids = np.random.randint(0, num_items, size=num_samples)
ratings = np.random.randint(1, 6, size=num_samples)

data = {'user_id': user_ids, 'item_id': item_ids, 'rating': ratings}
df_ratings = pd.DataFrame(data)
df_ratings

Unnamed: 0,user_id,item_id,rating
0,1996,36978,4
1,836,6530,1
2,325,91669,1
3,1214,125732,1
4,1168,144534,4
...,...,...,...
995,800,78793,1
996,296,141731,3
997,1817,25810,4
998,1646,35006,5


In [6]:
df_train = df_ratings.iloc[:800]
df_train_norm = df_train.copy(deep=True)
df_train_norm['rating'] -= 3
df_train_norm['rating'] /= 2
df_train_norm

Unnamed: 0,user_id,item_id,rating
0,1996,36978,0.5
1,836,6530,-1.0
2,325,91669,-1.0
3,1214,125732,-1.0
4,1168,144534,0.5
...,...,...,...
795,540,164884,0.0
796,1168,40589,0.0
797,333,101981,-0.5
798,686,112988,0.5


In [7]:
df_val = df_ratings.iloc[800:]
df_val_norm = df_val.copy(deep=True)
df_val_norm['rating'] -= 3
df_val_norm['rating'] /= 2
df_val_norm

Unnamed: 0,user_id,item_id,rating
800,1320,66937,0.5
801,856,82820,-1.0
802,1048,83107,1.0
803,1221,187504,-0.5
804,1253,43407,-1.0
...,...,...,...
995,800,78793,-1.0
996,296,141731,0.0
997,1817,25810,0.5
998,1646,35006,1.0


In [8]:
dataset_train = tf.data.Dataset.from_tensor_slices(df_train_norm)
dataset_train = dataset_train.batch(batch_size=BATCH_SIZE, num_parallel_calls=tf.data.AUTOTUNE, drop_remainder=True)\
                             .prefetch(buffer_size=tf.data.AUTOTUNE)

dataset_val = tf.data.Dataset.from_tensor_slices(df_val_norm)
dataset_val = dataset_val.batch(batch_size=BATCH_SIZE, num_parallel_calls=tf.data.AUTOTUNE, drop_remainder=True)\
                         .prefetch(buffer_size=tf.data.AUTOTUNE)

In [9]:
num_users = 2000
num_items = 200000
model = RecommenderModel(num_users, num_items)
model.build(input_shape=[(None, 1), (None, 1)])
print(model.summary())

Model: "recommender_model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       multiple                  100000    
                                                                 
 embedding_1 (Embedding)     multiple                  10000000  
                                                                 
 flatten (Flatten)           multiple                  0         
                                                                 
 concatenate (Concatenate)   multiple                  0         
                                                                 
 dense (Dense)               multiple                  12928     
                                                                 
 dense_1 (Dense)             multiple                  129       
                                                                 
Total params: 10,113,057
Trainable params: 10,113

In [10]:
# train the model
train_losses = []
val_losses = []

for epoch in range(1, 1000 + 1):
    train_loss = []
    val_loss = []

    # training
    for data in dataset_train:
        loss = model.train_step(data)
        train_loss.append(loss.numpy())

    # validating
    for data in dataset_val:
        loss = model.val_step(data)
        val_loss.append(loss.numpy())

    # record losses
    avg_train_loss = np.mean(train_loss)
    avg_val_loss = np.mean(val_loss)
    train_losses.append(avg_train_loss)
    val_losses.append(avg_val_loss)

    # print losses
    print(f'Epoch {epoch} train_loss: {avg_train_loss:.4f}, val_loss: {avg_val_loss:.4f}')

2024-01-13 22:13:24.000121: I tensorflow/stream_executor/cuda/cuda_blas.cc:1614] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.


Epoch 1 train_loss: 0.7321, val_loss: 0.7113
Epoch 2 train_loss: 0.7283, val_loss: 0.7085
Epoch 3 train_loss: 0.7245, val_loss: 0.7057
Epoch 4 train_loss: 0.7207, val_loss: 0.7029
Epoch 5 train_loss: 0.7169, val_loss: 0.7000
Epoch 6 train_loss: 0.7131, val_loss: 0.6972
Epoch 7 train_loss: 0.7091, val_loss: 0.6943
Epoch 8 train_loss: 0.7051, val_loss: 0.6913
Epoch 9 train_loss: 0.7009, val_loss: 0.6883
Epoch 10 train_loss: 0.6967, val_loss: 0.6853
Epoch 11 train_loss: 0.6922, val_loss: 0.6822
Epoch 12 train_loss: 0.6876, val_loss: 0.6790
Epoch 13 train_loss: 0.6828, val_loss: 0.6757
Epoch 14 train_loss: 0.6778, val_loss: 0.6724
Epoch 15 train_loss: 0.6725, val_loss: 0.6690
Epoch 16 train_loss: 0.6669, val_loss: 0.6655
Epoch 17 train_loss: 0.6610, val_loss: 0.6620
Epoch 18 train_loss: 0.6549, val_loss: 0.6583
Epoch 19 train_loss: 0.6483, val_loss: 0.6546
Epoch 20 train_loss: 0.6415, val_loss: 0.6509
Epoch 21 train_loss: 0.6344, val_loss: 0.6471
Epoch 22 train_loss: 0.6269, val_loss: 0.64