Code from https://medium.com/@jdwittenauer/deep-learning-with-keras-recommender-systems-e7b99cb29929

In [2]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

In [3]:
ratings = pd.read_pickle("./Data/moderate_users_longform.pkl")

In [4]:
ratings.head()

Unnamed: 0,movie_id,rating_score,user_id
5,1066,4,97262846
8,1066,4,58420503
11,1066,2,15492642
12,1066,4,29755671
15,1066,3,93302487


change user and movie IDs to be sequential integers beginning at 0

In [5]:
user_enc = LabelEncoder()
ratings['user'] = user_enc.fit_transform(ratings['user_id'].values)
n_users = ratings['user'].nunique()
n_users

141676

In [6]:
item_enc = LabelEncoder()
ratings['movie'] = item_enc.fit_transform(ratings['movie_id'].values)
n_movies = ratings['movie'].nunique()
n_movies

72601

In [7]:
ratings['rating_score'].unique()

array([4, 2, 3, 5, 1], dtype=int8)

In [25]:
min_rating = min(ratings['rating_score'])
max_rating = max(ratings['rating_score'])

In [9]:
ratings.head()

Unnamed: 0,movie_id,rating_score,user_id,user,movie
5,1066,4,97262846,137735,1023
8,1066,4,58420503,83022,1023
11,1066,2,15492642,22070,1023
12,1066,4,29755671,42177,1023
15,1066,3,93302487,132199,1023


creating a validation and a test set.

In [10]:
X = ratings[['user', 'movie']].values
y = ratings['rating_score'].values

In [11]:
X_train, X_split, y_train, y_split =  train_test_split(X, y, test_size=.4, random_state=0)

In [12]:
X_validate, X_test, y_validate, y_test = train_test_split(X_split, y_split, test_size=.5, random_state=0)

In [13]:
X_train.shape, X_validate.shape, X_test.shape

((4118556, 2), (1372852, 2), (1372852, 2))

In [14]:
y_train.shape, y_validate.shape, y_test.shape

((4118556,), (1372852,), (1372852,))

In [17]:
X_train_array = [X_train[:, 0], X_train[:, 1]]
X_val_array = [X_validate[:, 0], X_validate[:, 1]]

In [18]:
n_factors = 50

In [19]:
import tensorflow as tf

In [None]:
# model = tf.keras.models.Sequential([
#   tf.keras.layers.InputLayer(input_shape=(23279,)),
#   tf.keras.layers.Dense(23279, activation='relu'),
#   tf.keras.layers.Dropout(0.2),
# #   tf.keras.layers.Dense(10)
# ])

In [None]:
# opt = tf.keras.optimizers.Adam(learning_rate=.1)
# model.compile(loss = 'mean_squared_error', optimizer=opt)

In [43]:
from tensorflow.keras.layers import Concatenate, Dense, Dropout, Input, Embedding, Reshape, Dot
from tensorflow.keras.regularizers import l2
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

In [30]:
from tensorflow.keras.layers import Add, Activation, Lambda
class EmbeddingLayer:
    def __init__(self, n_items, n_factors):
        self.n_items = n_items
        self.n_factors = n_factors
    
    def __call__(self, x):
        x = Embedding(self.n_items, self.n_factors, embeddings_initializer='he_normal',
                      embeddings_regularizer=l2(1e-6))(x)
        x = Reshape((self.n_factors,))(x)
        return x

In [45]:
def RecommenderNet(n_users, n_movies, n_factors, min_rating, max_rating):
    user = Input(shape=(1,))
    u = EmbeddingLayer(n_users, n_factors)(user)
    
    movie = Input(shape=(1,))
    m = EmbeddingLayer(n_movies, n_factors)(movie)
    
    x = Concatenate()([u, m])
    x = Dropout(0.05)(x)
    
    x = Dense(10, kernel_initializer='he_normal')(x)
    x = Activation('relu')(x)
    x = Dropout(0.5)(x)
    
    x = Dense(1, kernel_initializer='he_normal')(x)
    x = Activation('sigmoid')(x)
    x = Lambda(lambda x: x * (max_rating - min_rating) + min_rating)(x)    
    model = Model(inputs=[user, movie], outputs=x)
    opt = Adam(learning_rate=0.001)
    model.compile(loss='mean_squared_error', optimizer=opt)    
    
    return model

In [46]:
model = RecommenderNet(n_users, n_movies, n_factors, min_rating, max_rating)

In [48]:
model.summary()

Model: "model_2"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_11 (InputLayer)           [(None, 1)]          0                                            
__________________________________________________________________________________________________
input_12 (InputLayer)           [(None, 1)]          0                                            
__________________________________________________________________________________________________
embedding_7 (Embedding)         (None, 1, 50)        7083800     input_11[0][0]                   
__________________________________________________________________________________________________
embedding_8 (Embedding)         (None, 1, 50)        3630050     input_12[0][0]                   
____________________________________________________________________________________________

In [None]:
fitting = model.fit(x=X_train_array, y=y_train, batch_size=64, epochs=5, verbose=1,
                   validation_data=(X_val_array, y_validate))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5