In [8]:
import numpy as np
import tensorflow as tf
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# Simple Recommender Model
The architecture includes embedding layers that's utilized for users and items. It computes the dot product of the user and item embeddings to get the predicted rating. The actication function used is a sigmoid activation function to the dot product result to map it to a probability.

### Complexity:
The model is straightforward with fewer parameters and no additional dense layers. But it is limited to capturing linear interactions between users and items. 

#### Please scroll below to find the enhanced version of the model

In [9]:
# Load the MovieLens dataset
data_url = 'http://files.grouplens.org/datasets/movielens/ml-100k/u.data'
column_names = ['user_id', 'item_id', 'rating', 'timestamp']
df = pd.read_csv(data_url, sep='\t', names=column_names)

# Encode user_id and item_id
user_encoder = LabelEncoder()
item_encoder = LabelEncoder()
df['user_id'] = user_encoder.fit_transform(df['user_id'])
df['item_id'] = item_encoder.fit_transform(df['item_id'])

# Split the data into training and test sets
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)

# Create TensorFlow datasets
train_dataset = tf.data.Dataset.from_tensor_slices(({'user_id': train_df['user_id'], 'item_id': train_df['item_id']}, train_df['rating']))
test_dataset = tf.data.Dataset.from_tensor_slices(({'user_id': test_df['user_id'], 'item_id': test_df['item_id']}, test_df['rating']))

# Batch and prefetch the data
train_dataset = train_dataset.shuffle(len(train_df)).batch(256).prefetch(tf.data.experimental.AUTOTUNE)
test_dataset = test_dataset.batch(256).prefetch(tf.data.experimental.AUTOTUNE)


In [15]:
from tensorflow.keras import layers

# Define the model
class SimpleRecommenderNet(tf.keras.Model):
    def __init__(self, num_users, num_items, embedding_size=50, **kwargs):
        super(SimpleRecommenderNet, self).__init__(**kwargs)
        self.user_embedding = layers.Embedding(num_users, embedding_size, embeddings_initializer='he_normal', embeddings_regularizer=tf.keras.regularizers.l2(1e-6))
        self.item_embedding = layers.Embedding(num_items, embedding_size, embeddings_initializer='he_normal', embeddings_regularizer=tf.keras.regularizers.l2(1e-6))

    def call(self, inputs):
        user_vector = self.user_embedding(inputs['user_id'])
        item_vector = self.item_embedding(inputs['item_id'])
        dot_user_item = tf.reduce_sum(user_vector * item_vector, axis=1)
        return tf.nn.sigmoid(dot_user_item)

num_users = df['user_id'].nunique()
num_items = df['item_id'].nunique()
model = SimpleRecommenderNet(num_users, num_items)

# Compile the model
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(train_dataset, epochs=15, validation_data=test_dataset)

# Evaluate the model
test_loss, test_accuracy = model.evaluate(test_dataset)
print(f'Test Loss: {test_loss}')
print(f'Test Accuracy: {test_accuracy}')


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
Test Loss: -1142.050048828125
Test Accuracy: 0.012658228166401386


# Enhanced Recommender Model

The same architecture was used by using embedding layers for users and time. However, this model also includes concatenation and multiple dense layers using ReLU activation to make better predictions. The output layer uses a final dense layer with sigmoid activation to predict the rating.

### Complexity:
The enhanced version includes additional dense layers, making it more powerful with a higher capacity to learn complex patterns. And it can capture non-linear interactions between users and items.

In [16]:
# Load the MovieLens dataset
data_url = 'http://files.grouplens.org/datasets/movielens/ml-100k/u.data'
column_names = ['user_id', 'item_id', 'rating', 'timestamp']
df = pd.read_csv(data_url, sep='\t', names=column_names)

# Encode user_id and item_id
user_encoder = LabelEncoder()
item_encoder = LabelEncoder()
df['user_id'] = user_encoder.fit_transform(df['user_id'])
df['item_id'] = item_encoder.fit_transform(df['item_id'])

# Normalize ratings to 0 or 1
df['rating'] = df['rating'].apply(lambda x: 1 if x >= 3 else 0)

# Split the data into training and test sets
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)

# Create TensorFlow datasets
train_dataset = tf.data.Dataset.from_tensor_slices(({'user_id': train_df['user_id'].values, 'item_id': train_df['item_id'].values}, train_df['rating'].values))
test_dataset = tf.data.Dataset.from_tensor_slices(({'user_id': test_df['user_id'].values, 'item_id': test_df['item_id'].values}, test_df['rating'].values))

# Batch and prefetch the data
train_dataset = train_dataset.shuffle(len(train_df)).batch(256).prefetch(tf.data.experimental.AUTOTUNE)
test_dataset = test_dataset.batch(256).prefetch(tf.data.experimental.AUTOTUNE)


In [17]:
from tensorflow.keras import layers

# Define the model
class EnhancedRecommenderNet(tf.keras.Model):
    def __init__(self, num_users, num_items, embedding_size=50, **kwargs):
        super(EnhancedRecommenderNet, self).__init__(**kwargs)
        self.user_embedding = layers.Embedding(num_users, embedding_size, embeddings_initializer='he_normal', embeddings_regularizer=tf.keras.regularizers.l2(1e-6))
        self.item_embedding = layers.Embedding(num_items, embedding_size, embeddings_initializer='he_normal', embeddings_regularizer=tf.keras.regularizers.l2(1e-6))
        self.dense_1 = layers.Dense(128, activation='relu')
        self.dense_2 = layers.Dense(64, activation='relu')
        self.output_layer = layers.Dense(1, activation='sigmoid')

    def call(self, inputs):
        user_vector = self.user_embedding(inputs['user_id'])
        item_vector = self.item_embedding(inputs['item_id'])
        concat = tf.concat([user_vector, item_vector], axis=-1)
        x = self.dense_1(concat)
        x = self.dense_2(x)
        return self.output_layer(x)

num_users = df['user_id'].nunique()
num_items = df['item_id'].nunique()
model = EnhancedRecommenderNet(num_users, num_items)

# Compile the model
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(train_dataset, epochs=10, validation_data=test_dataset)

# Evaluate the model
test_loss, test_accuracy = model.evaluate(test_dataset)
print(f'Test Loss: {test_loss}')
print(f'Test Accuracy: {test_accuracy}')


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test Loss: 0.49344485998153687
Test Accuracy: 0.8314499855041504
