Code from https://medium.com/@jdwittenauer/deep-learning-with-keras-recommender-systems-e7b99cb29929

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [11]:
import tensorflow_recommenders as tfrs

In [21]:
ratings = pd.read_pickle("tiny_selection_longform.pkl")
user_stats = pd.read_pickle("./Data/moderate_user_stats.pkl")

In [22]:
ratings.head()

Unnamed: 0,movie_id,rating_score,user_id
0,287,4,70807566
1,3394,4,4650066
2,115,4,36549717
3,2430,4,88582005
4,3394,4,33074282


In [25]:
ratings.rename(columns={'rating_score':'user_rating'}, inplace=True)

In [26]:
ratings.head()

Unnamed: 0,movie_id,user_rating,user_id
0,287,4,70807566
1,3394,4,4650066
2,115,4,36549717
3,2430,4,88582005
4,3394,4,33074282


In [23]:
tf.random.set_seed(42)

In [27]:
dataset = tf.data.Dataset.from_tensor_slices(ratings.values)

In [28]:
shuffled = dataset.shuffle(len(ratings), seed=42, reshuffle_each_iteration=False)

In [30]:
train = shuffled.take(int(len(ratings)*.8))

In [31]:
test = shuffled.skip(int(len(ratings)*.8)).take(int(len(ratings)*.2))

In [4]:
user_enc = LabelEncoder()
ratings['user'] = user_enc.fit_transform(ratings['user_id'].values)
n_users = ratings['user'].nunique()
n_users

2390

In [5]:
item_enc = LabelEncoder()
ratings['movie'] = item_enc.fit_transform(ratings['movie_id'].values)
n_movies = ratings['movie'].nunique()
n_movies

1648

In [6]:
ratings['rating_score'].unique()

array([4, 2, 3, 5, 1], dtype=int8)

In [7]:
min_rating = min(ratings['rating_score'])
max_rating = max(ratings['rating_score'])

In [8]:
ratings.head()

Unnamed: 0,movie_id,rating_score,user_id,user,movie
0,287,4,70807566,1671,122
1,3394,4,4650066,98,595
2,115,4,36549717,893,18
3,2430,4,88582005,2115,491
4,3394,4,33074282,811,595


In [67]:
class RankingModel(keras.Model):
    def __init__(self):
        super().__init__()
        embedding_dimension = 32
        
        self.user_embeddings = keras.Sequential([
            keras.layers.experimental.preprocessing.IntegerLookup(
                vocabulary = ratings['user_id'].unique(), mask_token=None),
            keras.layers.Embedding(ratings['user_id'].nunique()+1, embedding_dimension)
        ])
        
        
        self.movie_embeddings = keras.Sequential([
            keras.layers.experimental.preprocessing.IntegerLookup(
                vocabulary = ratings['movie_id'].unique(), mask_token=None),
            keras.layers.Embedding(ratings['movie_id'].nunique()+1, embedding_dimension)
        ])
        
        
        self.ratings = keras.Sequential([
            keras.layers.Dense(256, activation='relu'),
            keras.layers.Dense(64, activation='relu'),
            keras.layers.Dense(1)
        ])
        
        
    def call(self, inputs):
        user_id, movie_id = inputs

        user_embedding = self.user_embeddings(user_id)
        movie_embedding = self.movie_embeddings(movie_id)

        return self.ratings(tf.concat([user_embedding, movie_embedding], axis=1))

In [12]:
task = tfrs.tasks.Ranking(
    loss = keras.losses.MeanSquaredError(),
    metrics = [keras.metrics.RootMeanSquaredError()])

In [14]:
from typing import Dict, Text

In [61]:
class MovieModel(tfrs.models.Model):
    def __init__(self):
        super().__init__()
        self.ranking_model: keras.Model = RankingModel()
        self.task: keras.layers.Layer = tfrs.tasks.Ranking(
            loss = keras.losses.MeanSquaredError(),
            metrics = [keras.metrics.RootMeanSquaredError()]
        )
            
    def compute_loss(self, features: Dict[Text, tf.Tensor], training=False) -> tf.Tensor:
        rating_predictions = self.ranking_model(         
            (features[2], features[0])) #user_id, movie_id
        
        return self.task(labels=features[1], predictions=rating_predictions)

In [73]:
model = MovieModel()
model.compile(optimizer = keras.optimizers.Adagrad(learning_rate=.01))

In [74]:
test = ratings.sample(frac=.8, random_state=42)
train = ratings[ratings.index.isin(test.index)==False]

In [77]:
model.fit(test, epochs=3, verbose=1)

Epoch 1/3
Epoch 2/3
Epoch 3/3


<tensorflow.python.keras.callbacks.History at 0x7f9eac0718b0>

In [78]:
model.evaluate(train)



[34095264.0, 696877972455424.0, 0, 696877972455424.0]

In [51]:
inputs.shape

TensorShape([None, 1648])

In [52]:
inputs.dtype

tf.float32

In [53]:
dense = layers.Dense(500, activation="relu")
x = dense(inputs)


In [54]:
x = layers.Dense(100, activation='softmax')(x)
outputs = layers.Dense(10)(x)

In [55]:
model = keras.Model(inputs=inputs, outputs=outputs, name="test1")

In [56]:
model.summary()

Model: "test1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_3 (InputLayer)         [(None, 1648)]            0         
_________________________________________________________________
dense_5 (Dense)              (None, 500)               824500    
_________________________________________________________________
dense_6 (Dense)              (None, 100)               50100     
_________________________________________________________________
dense_7 (Dense)              (None, 10)                1010      
Total params: 875,610
Trainable params: 875,610
Non-trainable params: 0
_________________________________________________________________


In [57]:
X = ratings[['user', 'movie']].values
y = ratings['rating_score'].values

X_train, X_test, y_train, y_test =  train_test_split(X, y, test_size=.2, random_state=0)

In [62]:
X_train.shape

(2000, 2)

In [58]:
model.compile(loss = keras.losses.MeanSquaredError(),
             optimizer = keras.optimizers.Adam(learning_rate =.001),
             metrics = keras.metrics.RootMeanSquaredError())

In [61]:
history = model.fit(X_train, y_train, epochs=5, validation_split=.2)
# test_scores = model.evaluate(X_test, y_test, verbose=2)

Epoch 1/5


ValueError: in user code:

    /home/sahar/anaconda3/envs/tf_env/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:855 train_function  *
        return step_function(self, iterator)
    /home/sahar/anaconda3/envs/tf_env/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:845 step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    /home/sahar/anaconda3/envs/tf_env/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:1285 run
        return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
    /home/sahar/anaconda3/envs/tf_env/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:2833 call_for_each_replica
        return self._call_for_each_replica(fn, args, kwargs)
    /home/sahar/anaconda3/envs/tf_env/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:3608 _call_for_each_replica
        return fn(*args, **kwargs)
    /home/sahar/anaconda3/envs/tf_env/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:838 run_step  **
        outputs = model.train_step(data)
    /home/sahar/anaconda3/envs/tf_env/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:795 train_step
        y_pred = self(x, training=True)
    /home/sahar/anaconda3/envs/tf_env/lib/python3.8/site-packages/tensorflow/python/keras/engine/base_layer.py:1013 __call__
        input_spec.assert_input_compatibility(self.input_spec, inputs, self.name)
    /home/sahar/anaconda3/envs/tf_env/lib/python3.8/site-packages/tensorflow/python/keras/engine/input_spec.py:267 assert_input_compatibility
        raise ValueError('Input ' + str(input_index) +

    ValueError: Input 0 is incompatible with layer test1: expected shape=(None, 1648), found shape=(32, 2)


In [None]:
print("test loss:", test_scores[0])
print("test")

### Below here is from link at the top of the notebook

change user and movie IDs to be sequential integers beginning at 0

In [9]:
user_enc = LabelEncoder()
ratings['user'] = user_enc.fit_transform(ratings['user_id'].values)
n_users = ratings['user'].nunique()
n_users

2390

In [10]:
item_enc = LabelEncoder()
ratings['movie'] = item_enc.fit_transform(ratings['movie_id'].values)
n_movies = ratings['movie'].nunique()
n_movies

1648

In [11]:
ratings['rating_score'].unique()

array([4, 2, 3, 5, 1], dtype=int8)

In [12]:
min_rating = min(ratings['rating_score'])
max_rating = max(ratings['rating_score'])

In [13]:
ratings.head()

Unnamed: 0,movie_id,rating_score,user_id,user,movie
0,287,4,70807566,1671,122
1,3394,4,4650066,98,595
2,115,4,36549717,893,18
3,2430,4,88582005,2115,491
4,3394,4,33074282,811,595


creating a validation and a test set.

In [14]:
X = ratings[['user', 'movie']].values
y = ratings['rating_score'].values

In [15]:
X_train, X_split, y_train, y_split =  train_test_split(X, y, test_size=.4, random_state=0)

In [16]:
X_validate, X_test, y_validate, y_test = train_test_split(X_split, y_split, test_size=.5, random_state=0)

In [17]:
X_train.shape, X_validate.shape, X_test.shape

((1500, 2), (500, 2), (500, 2))

In [18]:
y_train.shape, y_validate.shape, y_test.shape

((1500,), (500,), (500,))

In [19]:
X_train_array = [X_train[:, 0], X_train[:, 1]]
X_val_array = [X_validate[:, 0], X_validate[:, 1]]

In [20]:
n_factors = 50

In [21]:
import tensorflow as tf

In [None]:
# model = tf.keras.models.Sequential([
#   tf.keras.layers.InputLayer(input_shape=(23279,)),
#   tf.keras.layers.Dense(23279, activation='relu'),
#   tf.keras.layers.Dropout(0.2),
# #   tf.keras.layers.Dense(10)
# ])

In [None]:
# opt = tf.keras.optimizers.Adam(learning_rate=.1)
# model.compile(loss = 'mean_squared_error', optimizer=opt)

In [22]:
from tensorflow.keras.layers import Concatenate, Dense, Dropout, Input, Embedding, Reshape, Dot
from tensorflow.keras.regularizers import l2
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

In [23]:
from tensorflow.keras.layers import Add, Activation, Lambda
class EmbeddingLayer:
    def __init__(self, n_items, n_factors):
        self.n_items = n_items
        self.n_factors = n_factors
    
    def __call__(self, x):
        x = Embedding(self.n_items, self.n_factors, embeddings_initializer='he_normal',
                      embeddings_regularizer=l2(1e-6))(x)
        x = Reshape((self.n_factors,))(x)
        return x

In [24]:
def RecommenderNet(n_users, n_movies, n_factors, min_rating, max_rating):
    user = Input(shape=(1,))
    u = EmbeddingLayer(n_users, n_factors)(user)
    
    movie = Input(shape=(1,))
    m = EmbeddingLayer(n_movies, n_factors)(movie)
    
    x = Concatenate()([u, m])
    x = Dropout(0.05)(x)
    
    x = Dense(10, kernel_initializer='he_normal')(x)
    x = Activation('relu')(x)
    x = Dropout(0.5)(x)
    
    x = Dense(1, kernel_initializer='he_normal')(x)
    x = Activation('sigmoid')(x)
    x = Lambda(lambda x: x * (max_rating - min_rating) + min_rating)(x)    
    model = Model(inputs=[user, movie], outputs=x)
    opt = Adam(learning_rate=0.001)
    model.compile(loss='mean_squared_error', optimizer=opt)    
    
    return model

In [25]:
model = RecommenderNet(n_users, n_movies, n_factors, min_rating, max_rating)

In [26]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 1)]          0                                            
__________________________________________________________________________________________________
input_2 (InputLayer)            [(None, 1)]          0                                            
__________________________________________________________________________________________________
embedding (Embedding)           (None, 1, 50)        119500      input_1[0][0]                    
__________________________________________________________________________________________________
embedding_1 (Embedding)         (None, 1, 50)        82400       input_2[0][0]                    
______________________________________________________________________________________________

In [51]:
fitting = model.fit(x=X_train_array, y=y_train, batch_size=64, epochs=5, verbose=1,
                   validation_data=(X_val_array, y_validate))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
