# A tensorflow model for Dictionary Filter

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import tensorflow as tf

2024-07-29 15:23:04.550310: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-07-29 15:23:04.576061: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI AVX512_BF16 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
import sys
sys.path.append('../Library')

from Metrics.NMSE import NMSE
from Modules.BatchGaussianDF import BatchGaussianDF as DictionaryFilter

## Test on Movie Lens 100k dataset

In [3]:
# Read the dataset
dataset = pd.read_csv('../Data/MovieLens/Small/dataset.csv')

# Display the dataset
dataset.head()

Unnamed: 0,Movie Index,User Index,Rating
0,356,137,4.0
1,708,91,2.0
2,411,300,4.0
3,55,59,4.0
4,894,196,3.0


In [4]:
from AdvancedModelSelection import user_based_train_val_test_split

# Split the dataset into train and test data
train_data, val_data, test_data = user_based_train_val_test_split(dataset, test_size=0.2, val_size=0.1, random_state=42)

# Print the shapes of train and test data
print("Train data shape:", train_data.shape)
print("Validation data shape:", val_data.shape)
print("Test data shape:", test_data.shape)

Train data shape: (72837, 3)
Validation data shape: (7530, 3)
Test data shape: (19633, 3)


In [5]:
NUM_MOVIES = dataset['Movie Index'].max() + 1
NUM_USERS = dataset['User Index'].max() + 1
NUM_FACTORS = 2
BATCH_SIZE = 32
SIGMA = 0.2
GAMMA = 0.01
INIT_SCALE = dataset['Rating'].mean() / NUM_FACTORS

In [6]:
# Convert train data to sparse tensor
train_sparse_tensor = tf.sparse.SparseTensor(
    indices=train_data[['Movie Index', 'User Index']].values,
    values=train_data['Rating'].values,
    dense_shape=[NUM_MOVIES, NUM_USERS]
)
train_sparse_tensor = tf.sparse.reorder(train_sparse_tensor)

# Convert validation data to sparse tensor
val_sparse_tensor = tf.sparse.SparseTensor(
    indices=val_data[['Movie Index', 'User Index']].values,
    values=val_data['Rating'].values,
    dense_shape=[NUM_MOVIES, NUM_USERS]
)
val_sparse_tensor = tf.sparse.reorder(val_sparse_tensor)

# Convert test data to sparse tensor
test_sparse_tensor = tf.sparse.SparseTensor(
    indices=test_data[['Movie Index', 'User Index']].values,
    values=test_data['Rating'].values,
    dense_shape=[NUM_MOVIES, NUM_USERS]
)
test_sparse_tensor = tf.sparse.reorder(test_sparse_tensor)

# Create dataset
def data_generator():
    train_slices = tf.sparse.split(sp_input=train_sparse_tensor, num_split=NUM_USERS // BATCH_SIZE, axis=1)
    val_slices = tf.sparse.split(sp_input=val_sparse_tensor, num_split=NUM_USERS // BATCH_SIZE, axis=1)
    test_slices = tf.sparse.split(sp_input=test_sparse_tensor, num_split=NUM_USERS // BATCH_SIZE, axis=1)
    for i in range(NUM_USERS // BATCH_SIZE):
        yield (tf.sparse.to_dense(train_slices[i]), tf.sparse.to_dense(val_slices[i]), tf.sparse.to_dense(test_slices[i]))

dataset = tf.data.Dataset.from_generator(
    data_generator, 
    output_signature=(
        tf.TensorSpec(shape=[NUM_MOVIES, None], dtype=tf.float32),
        tf.TensorSpec(shape=[NUM_MOVIES, None], dtype=tf.float32),
        tf.TensorSpec(shape=[NUM_MOVIES, None], dtype=tf.float32)
    )
)

2024-07-29 15:23:05.437690: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:984] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-07-29 15:23:05.457090: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:984] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-07-29 15:23:05.457128: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:984] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-07-29 15:23:05.459218: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:984] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-07-29 15:23:05.459247: I external/local_xla/xla/stream_executor

In [7]:
# Create the model
model = DictionaryFilter(NUM_MOVIES, NUM_FACTORS, INIT_SCALE, SIGMA, GAMMA)
nmse_train = NMSE()
nmse_val = NMSE()
nmse_test = NMSE()

# Train the model
for epoch in range(10):
    print("Epoch:", epoch)
    for train_batch, val_batch, test_batch in dataset:
        x = model(train_batch)
        
        # Perform some gradient descent approximations of X
        if epoch > 5:
            for _ in range(5):
                x = model(train_batch, x)

        nmse_train.update_state(train_batch, tf.matmul(model.C, x), tf.cast(tf.not_equal(train_batch, 0.0), tf.float32))
        nmse_val.update_state(val_batch, tf.matmul(model.C, x), tf.cast(tf.not_equal(val_batch, 0.0), tf.float32))
        nmse_test.update_state(test_batch, tf.matmul(model.C, x), tf.cast(tf.not_equal(test_batch, 0.0), tf.float32))
    
    print("Train NMSE:", nmse_train.result().numpy())
    print("Val NMSE:", nmse_val.result().numpy())
    print("Test NMSE:", nmse_test.result().numpy())

    nmse_train.reset_states()
    nmse_val.reset_states()
    nmse_test.reset_states()

Epoch: 0


2024-07-29 15:23:08.665537: I tensorflow/core/util/cuda_solvers.cc:178] Creating GpuSolver handles for stream 0x55b2e06547f0
2024-07-29 15:23:10.473390: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


Train NMSE: 0.06990382
Val NMSE: 0.074992076
Test NMSE: 0.07356262
Epoch: 1


2024-07-29 15:23:11.590718: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


Train NMSE: 0.06841522
Val NMSE: 0.07389672
Test NMSE: 0.07248134
Epoch: 2


2024-07-29 15:23:12.752391: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


Train NMSE: 0.06793185
Val NMSE: 0.07348814
Test NMSE: 0.072063595
Epoch: 3


2024-07-29 15:23:13.931479: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


Train NMSE: 0.067635104
Val NMSE: 0.073225066
Test NMSE: 0.07182055
Epoch: 4


2024-07-29 15:23:15.155993: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


Train NMSE: 0.06742076
Val NMSE: 0.07304524
Test NMSE: 0.07163408
Epoch: 5


2024-07-29 15:23:16.393035: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


Train NMSE: 0.06725495
Val NMSE: 0.07291221
Test NMSE: 0.071482845
Epoch: 6


2024-07-29 15:23:18.420202: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


Train NMSE: 3480.1902
Val NMSE: 42305.426
Test NMSE: 37047.88
Epoch: 7


2024-07-29 15:23:20.281454: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


Train NMSE: 377.24945
Val NMSE: 358.72025
Test NMSE: 437.49036
Epoch: 8


2024-07-29 15:23:22.083676: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


Train NMSE: 174.00287
Val NMSE: 167.65701
Test NMSE: 204.72874
Epoch: 9
Train NMSE: 64.987274
Val NMSE: 76.63723
Test NMSE: 77.38152


2024-07-29 15:23:23.890846: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
