# A tensorflow model for Dictionary Filter

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import tensorflow as tf

2024-07-29 15:23:45.352199: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-07-29 15:23:45.385547: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI AVX512_BF16 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
# class DictionaryFilter(tf.Module):
#     """
#     Input:
#         Y: tf.Tensor of shape (m, d), where m is the number of samples and d is the batch size
#     Variables:
#         C: tf.Tensor of shape (m, r)
#         V: tf.Tensor of shape (r, r)
#     """

#     def __init__(self, m, r, init_scale, train_lambda, name=None):
#         super().__init__(name=name)
#         self.C = tf.Variable(tf.multiply(tf.random.truncated_normal([m, r], mean=1.0, stddev=0.02), init_scale))
#         self.V = tf.Variable(tf.eye(r, dtype=tf.float32))
#         self.train_lambda = tf.constant(train_lambda)

#     @tf.function
#     def __call__(self, Y):
#         # Define the mask
#         M = tf.cast(tf.not_equal(Y, 0.0), tf.float32) # shape: (m, d)

#         # Compute X
#         def compute_xk(inputs):
#             m = tf.reshape(inputs[0], [-1, 1])
#             y = tf.reshape(inputs[1], [-1, 1])
#             mC = tf.multiply(m, self.C)
#             x = tf.matmul(tf.linalg.pinv(tf.matmul(mC, mC, transpose_a=True)), tf.matmul(mC, y, transpose_a=True))
#             return tf.reshape(x, [-1])

#         X = tf.map_fn(
#             fn=compute_xk, elems=(tf.transpose(M), tf.transpose(Y)), 
#             fn_output_signature=tf.float32
#         )
#         X = tf.transpose(X) # shape: (r, d)

#         # Update C and V
#         denominator = tf.multiply(
#             tf.add(
#                 tf.reduce_sum(tf.multiply(X, tf.matmul(self.V, X)), axis=0, keepdims=True),
#                 self.train_lambda
#             ),
#             tf.cast(tf.reshape(tf.shape(Y)[1], [1, 1]), tf.float32)
#         ) # shape: (1, d)

#         reduced_X = tf.divide(X, denominator) # shape: (r, d)

#         temp = tf.matmul(reduced_X, self.V, transpose_a=True) # shape: (d, r)

#         self.C.assign(tf.add(
#             self.C,
#             tf.matmul(
#                 tf.multiply(M, tf.subtract(Y, tf.matmul(self.C, X))),
#                 temp
#             )
#         )) # shape: (m, r)

#         self.V.assign(tf.subtract(
#             self.V,
#             tf.matmul(
#                 tf.matmul(self.V, X),
#                 temp
#             )
#         )) # shape: (r, r)

#         return X

In [3]:
# class NMSE(tf.keras.metrics.Metric):
#     def __init__(self, name='nmse', **kwargs):
#         super(NMSE, self).__init__(name=name, **kwargs)
#         self.total_mse = self.add_weight(name='total_mse', initializer='zeros')
#         self.total_samples = self.add_weight(name='total_samples', initializer='zeros')

#     def update_state(self, y_true, y_pred, sample_weight=None):
#         # If there is a mask, apply it
#         if sample_weight is not None:
#             sample_weight = tf.cast(sample_weight, self.dtype)
#             y_true *= sample_weight
#             y_pred *= sample_weight

#         # Update states
#         mse = tf.reduce_sum(tf.square(y_true - y_pred))
#         samples = tf.reduce_sum(tf.square(y_true))
#         self.total_mse.assign_add(mse)
#         self.total_samples.assign_add(samples)

#     def result(self):
#         return self.total_mse / self.total_samples

#     def reset_states(self):
#         self.total_mse.assign(0.0)
#         self.total_samples.assign(0.0)

In [4]:
import sys
sys.path.append('../Library')

from Metrics.NMSE import NMSE
from Modules.GaussianDF import GaussianDF as DictionaryFilter

## Test on Movie Lens 100k dataset

In [5]:
# Read the dataset
dataset = pd.read_csv('../Data/MovieLens/Small/dataset.csv')

# Display the dataset
dataset.head()

Unnamed: 0,Movie Index,User Index,Rating
0,356,137,4.0
1,708,91,2.0
2,411,300,4.0
3,55,59,4.0
4,894,196,3.0


In [6]:
from AdvancedModelSelection import user_based_train_val_test_split

# Split the dataset into train and test data
train_data, val_data, test_data = user_based_train_val_test_split(dataset, test_size=0.2, val_size=0.1, random_state=42)

# Print the shapes of train and test data
print("Train data shape:", train_data.shape)
print("Validation data shape:", val_data.shape)
print("Test data shape:", test_data.shape)

Train data shape: (72837, 3)
Validation data shape: (7530, 3)
Test data shape: (19633, 3)


In [7]:
NUM_MOVIES = dataset['Movie Index'].max() + 1
NUM_USERS = dataset['User Index'].max() + 1
NUM_FACTORS = 2
BATCH_SIZE = 32
TRAIN_LAMBDA = 2.0
INIT_SCALE = dataset['Rating'].mean() / NUM_FACTORS

In [8]:
# Convert train data to sparse tensor
train_sparse_tensor = tf.sparse.SparseTensor(
    indices=train_data[['Movie Index', 'User Index']].values,
    values=train_data['Rating'].values,
    dense_shape=[NUM_MOVIES, NUM_USERS]
)
train_sparse_tensor = tf.sparse.reorder(train_sparse_tensor)

# Convert validation data to sparse tensor
val_sparse_tensor = tf.sparse.SparseTensor(
    indices=val_data[['Movie Index', 'User Index']].values,
    values=val_data['Rating'].values,
    dense_shape=[NUM_MOVIES, NUM_USERS]
)
val_sparse_tensor = tf.sparse.reorder(val_sparse_tensor)

# Convert test data to sparse tensor
test_sparse_tensor = tf.sparse.SparseTensor(
    indices=test_data[['Movie Index', 'User Index']].values,
    values=test_data['Rating'].values,
    dense_shape=[NUM_MOVIES, NUM_USERS]
)
test_sparse_tensor = tf.sparse.reorder(test_sparse_tensor)

# Create dataset
def data_generator():
    train_slices = tf.sparse.split(sp_input=train_sparse_tensor, num_split=NUM_USERS // BATCH_SIZE, axis=1)
    val_slices = tf.sparse.split(sp_input=val_sparse_tensor, num_split=NUM_USERS // BATCH_SIZE, axis=1)
    test_slices = tf.sparse.split(sp_input=test_sparse_tensor, num_split=NUM_USERS // BATCH_SIZE, axis=1)
    for i in range(NUM_USERS // BATCH_SIZE):
        yield (tf.sparse.to_dense(train_slices[i]), tf.sparse.to_dense(val_slices[i]), tf.sparse.to_dense(test_slices[i]))

dataset = tf.data.Dataset.from_generator(
    data_generator, 
    output_signature=(
        tf.TensorSpec(shape=[NUM_MOVIES, None], dtype=tf.float32),
        tf.TensorSpec(shape=[NUM_MOVIES, None], dtype=tf.float32),
        tf.TensorSpec(shape=[NUM_MOVIES, None], dtype=tf.float32)
    )
)

2024-07-29 15:23:46.253565: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:984] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-07-29 15:23:46.303829: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:984] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-07-29 15:23:46.303869: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:984] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-07-29 15:23:46.305555: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:984] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-07-29 15:23:46.305597: I external/local_xla/xla/stream_executor

In [9]:
# Create the model
model = DictionaryFilter(NUM_MOVIES, NUM_FACTORS, INIT_SCALE, TRAIN_LAMBDA)
nmse_train = NMSE()
nmse_val = NMSE()
nmse_test = NMSE()

# Train the model
for epoch in range(10):
    print("Epoch:", epoch)
    for train_batch, val_batch, test_batch in dataset:
        x = model(train_batch)
        nmse_train.update_state(train_batch, tf.matmul(model.C, x), tf.cast(tf.not_equal(train_batch, 0.0), tf.float32))
        nmse_val.update_state(val_batch, tf.matmul(model.C, x), tf.cast(tf.not_equal(val_batch, 0.0), tf.float32))
        nmse_test.update_state(test_batch, tf.matmul(model.C, x), tf.cast(tf.not_equal(test_batch, 0.0), tf.float32))
    print("Train NMSE:", nmse_train.result().numpy())
    print("Val NMSE:", nmse_val.result().numpy())
    print("Test NMSE:", nmse_test.result().numpy())
    nmse_train.reset_states()
    nmse_val.reset_states()
    nmse_test.reset_states()

Epoch: 0


2024-07-29 15:23:47.166265: I tensorflow/core/util/cuda_solvers.cc:178] Creating GpuSolver handles for stream 0x56349d4b82e0
2024-07-29 15:23:48.701025: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


Train NMSE: 0.06839376
Val NMSE: 0.073936656
Test NMSE: 0.07221928
Epoch: 1


2024-07-29 15:23:49.662714: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


Train NMSE: 0.065912
Val NMSE: 0.07183566
Test NMSE: 0.07004571
Epoch: 2


2024-07-29 15:23:50.606647: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


Train NMSE: 0.06540892
Val NMSE: 0.071362466
Test NMSE: 0.069560945
Epoch: 3


2024-07-29 15:23:51.565025: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


Train NMSE: 0.06511702
Val NMSE: 0.07109586
Test NMSE: 0.06932655
Epoch: 4


2024-07-29 15:23:52.519672: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


Train NMSE: 0.06491697
Val NMSE: 0.070903085
Test NMSE: 0.06913952
Epoch: 5


2024-07-29 15:23:53.505407: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


Train NMSE: 0.06476351
Val NMSE: 0.0707439
Test NMSE: 0.06900969
Epoch: 6


2024-07-29 15:23:54.570482: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


Train NMSE: 0.06464086
Val NMSE: 0.07066642
Test NMSE: 0.06891571
Epoch: 7


2024-07-29 15:23:55.617282: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


Train NMSE: 0.06454013
Val NMSE: 0.07055425
Test NMSE: 0.068820834
Epoch: 8


2024-07-29 15:23:56.655239: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


Train NMSE: 0.06445202
Val NMSE: 0.07045923
Test NMSE: 0.068746455
Epoch: 9
Train NMSE: 0.06437284
Val NMSE: 0.07037607
Test NMSE: 0.06867517


2024-07-29 15:23:57.703654: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
