# Deep Neural Collaborative Filtering

Use concatenate and dot product to build the deep recommender model

In [1]:
import sys
sys.path.append("../")

import warnings
warnings.filterwarnings("ignore")

In [2]:
import numpy as np
import pandas as pd
from IPython.display import SVG, display
import matplotlib.pyplot as plt
import seaborn as sns
from reco.preprocess import encode_user_item, random_split, user_split

ModuleNotFoundError: No module named 'reco'

In [None]:
%matplotlib inline

### Prepare the data

In [None]:
df_ratings = pd.read_csv("data/ratings.csv")
df_items = pd.read_csv("data/items.csv")

In [None]:
# Data Encoding
DATA, user_encoder, item_encoder = encode_user_item(df_ratings, "user_id", "movie_id", "rating", "unix_timestamp")

Number of users:  943
Number of items:  1682


In [None]:
DATA.head()

Unnamed: 0,user_id,movie_id,RATING,TIMESTAMP,USER,ITEM
0,196,242,3,881250949,195,241
1,186,302,3,891717742,185,301
2,22,377,1,878887116,21,376
3,244,51,2,880606923,243,50
4,166,346,1,886397596,165,345


In [None]:
n_users = DATA.USER.nunique()
n_items = DATA.ITEM.nunique()
n_users, n_items

(943, 1682)

In [None]:
max_rating = DATA.RATING.max()
min_rating = DATA.RATING.min()
min_rating, max_rating

(1, 5)

In [None]:
# Data Splitting
#train, val, test = user_split(DATA, [0.6, 0.2, 0.2])
train, test = user_split(DATA, [0.8, 0.2])

In [None]:
train.shape, test.shape

((80000, 7), (20000, 7))

## Deep Neural CF

This is a model with both dot and concatenate layers

In [None]:
pip install keras

Note: you may need to restart the kernel to use updated packages.


In [None]:
pip install tensorflow

Note: you may need to restart the kernel to use updated packages.


In [None]:
# pip install reco

Collecting reco
  Using cached reco-0.2.1.tar.gz (1.4 MB)
[31m    ERROR: Command errored out with exit status 1:
     command: /usr/bin/python3 -c 'import sys, setuptools, tokenize; sys.argv[0] = '"'"'/tmp/pip-install-u8hkb4vy/reco/setup.py'"'"'; __file__='"'"'/tmp/pip-install-u8hkb4vy/reco/setup.py'"'"';f=getattr(tokenize, '"'"'open'"'"', open)(__file__);code=f.read().replace('"'"'\r\n'"'"', '"'"'\n'"'"');f.close();exec(compile(code, __file__, '"'"'exec'"'"'))' egg_info --egg-base /tmp/pip-install-u8hkb4vy/reco/pip-egg-info
         cwd: /tmp/pip-install-u8hkb4vy/reco/
    Complete output (3 lines):
    You need Cython too :(.
     Use pip install cython.
    No more requirements, promise!
    ----------------------------------------[0m
[31mERROR: Command errored out with exit status 1: python setup.py egg_info Check the logs for full command output.[0m
Note: you may need to restart the kernel to use updated packages.


In [None]:
# pip install altair

Note: you may need to restart the kernel to use updated packages.


In [None]:
from keras.models import Model
from keras.layers import Input, Embedding, Flatten, Dot, Add, Lambda, Activation, Reshape, Concatenate, Dense
from keras.regularizers import l2
from keras.constraints import non_neg
from keras.utils import plot_model
from keras.utils.vis_utils import model_to_dot
from reco import vis

### Build the Model

In [None]:
def Neural_CF(n_users, n_items, n_factors):
    
    # Item Layer
    item_input = Input(shape=[1], name='Item')
    
    # Item Embedding MF
    item_embedding_mf = Embedding(n_items, n_factors, embeddings_regularizer=l2(1e-6),
                                  embeddings_initializer='he_normal',
                                  name='ItemEmbeddingMF')(item_input)
    item_vec_mf = Flatten(name='FlattenItemMF')(item_embedding_mf)
    
    
    # Item embedding MLP
    item_embedding_mlp = Embedding(n_items, n_factors, embeddings_regularizer=l2(1e-6),
                                embeddings_initializer='he_normal',
                               name='ItemEmbeddingMLP')(item_input)
    item_vec_mlp = Flatten(name='FlattenItemMLP')(item_embedding_mlp)
    

    # User Layer
    user_input = Input(shape=[1], name='User')
    
    # User Embedding MF
    user_embedding_mf = Embedding(n_users, n_factors, embeddings_regularizer=l2(1e-6), 
                                embeddings_initializer='he_normal',
                               name='UserEmbeddingMF')(user_input)
    user_vec_mf = Flatten(name='FlattenUserMF')(user_embedding_mf)
    
    # User Embedding MF
    user_embedding_mlp = Embedding(n_users, n_factors, embeddings_regularizer=l2(1e-6),
                               embeddings_initializer='he_normal',
                               name='UserEmbeddingMLP')(user_input)
    user_vec_mlp = Flatten(name='FlattenUserMLP')(user_embedding_mlp)
    
    # Multiply MF paths
    DotProductMF = Dot(axes=1, name='DotProductMF')([item_vec_mf, user_vec_mf])
    
    # Concat MLP paths
    ConcatMLP = Concatenate(name='ConcatMLP')([item_vec_mlp, user_vec_mlp])
    
    # Use Dense to learn non-linear dense representation
    Dense_1 = Dense(50, name="Dense1")(ConcatMLP)
    Dense_2 = Dense(20, name="Dense2")(Dense_1)

    # Concatenate MF and MLP paths
    Concat = Concatenate(name="ConcatAll")([DotProductMF, Dense_2])
    
    # Use Dense to learn non-linear dense representation
    Pred = Dense(1, name="Pred")(Concat)
    

    # Item Bias
    item_bias = Embedding(n_items, 1, embeddings_regularizer=l2(1e-5), name='ItemBias')(item_input)
    item_bias_vec = Flatten(name='FlattenItemBiasE')(item_bias)

    # User Bias
    user_bias = Embedding(n_users, 1, embeddings_regularizer=l2(1e-5), name='UserBias')(user_input)
    user_bias_vec = Flatten(name='FlattenUserBiasE')(user_bias)

    # Pred with bias added
    PredAddBias = Add(name="AddBias")([Pred, item_bias_vec, user_bias_vec])
    
    
    # Scaling for each user
    y = Activation('sigmoid')(PredAddBias)
    rating_output = Lambda(lambda x: x * (max_rating - min_rating) + min_rating)(y)
    
    # Model Creation
    model = Model([user_input, item_input], rating_output)
    
    # Compile Model
    model.compile(loss='mean_squared_error', optimizer="adam")
    
    return model

In [None]:
n_factors = 40
model = Neural_CF(n_users, n_items, n_factors)

In [None]:
model.summary()

Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 Item (InputLayer)              [(None, 1)]          0           []                               
                                                                                                  
 User (InputLayer)              [(None, 1)]          0           []                               
                                                                                                  
 ItemEmbeddingMLP (Embedding)   (None, 1, 40)        67280       ['Item[0][0]']                   
                                                                                                  
 UserEmbeddingMLP (Embedding)   (None, 1, 40)        37720       ['User[0][0]']                   
                                                                                            

In [None]:
from reco.utils import create_directory
create_directory("/model-img")

Directory already exists /home/jack/recommendation/MovieLens/model-img


In [None]:
plot_model(model, show_layer_names=True, show_shapes=True, to_file="model-img/Neural-CF.png" )

You must install pydot (`pip install pydot`) and install graphviz (see instructions at https://graphviz.gitlab.io/download/) for plot_model to work.


### Train the Model

In [None]:
%%time
output = model.fit([train.USER, train.ITEM], train.RATING, 
                                  batch_size=128, epochs=5, verbose=1, validation_split=0.2)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
CPU times: user 23.4 s, sys: 1.11 s, total: 24.5 s
Wall time: 12.2 s


In [None]:
vis.metrics(output.history)

### Score the Model

In [None]:
score = model.evaluate([test.USER, test.ITEM], test.RATING, verbose=1)
score



0.9890151619911194

### Evaluate the Model

In [None]:
from reco.evaluate import get_embedding, get_predictions, recommend_topk
from reco.evaluate import precision_at_k, recall_at_k, ndcg_at_k

In [None]:
item_embedding_mf = get_embedding(model, "ItemEmbeddingMF")
user_embedding_mf = get_embedding(model, "UserEmbeddingMF")
item_embedding_mlp = get_embedding(model, "ItemEmbeddingMLP")
user_embedding_mlp = get_embedding(model, "UserEmbeddingMLP")

In [None]:
item_embedding = np.mean([item_embedding_mf,item_embedding_mlp], axis=0)
user_embedding = np.mean([user_embedding_mf,user_embedding_mlp], axis=0)

In [None]:
%%time
predictions = get_predictions(model, DATA)

CPU times: user 1min 58s, sys: 4.8 s, total: 2min 3s
Wall time: 1min 43s


In [None]:
predictions.head()

Unnamed: 0,USER,ITEM,RATING_PRED
0,195,241,3.255465
1,195,301,4.097915
2,195,376,1.864258
3,195,50,3.526865
4,195,345,3.997539


In [None]:
%%time
# Recommendation for Top10K
ranking_topk = recommend_topk(model, DATA, train, k=5)

CPU times: user 1min 55s, sys: 4.78 s, total: 2min
Wall time: 1min 40s


In [None]:
eval_precision = precision_at_k(test, ranking_topk, k=10)
eval_recall = recall_at_k(test, ranking_topk, k=10)
eval_ndcg = ndcg_at_k(test, ranking_topk, k=10)

print("NDCG@K:\t%f" % eval_ndcg,
      "Precision@K:\t%f" % eval_precision,
      "Recall@K:\t%f" % eval_recall, sep='\n')

NDCG@K:	0.072917
Precision@K:	0.052916
Recall@K:	0.024755


### Get Similar Items

In [None]:
from reco.recommend import get_similar, show_similar

In [None]:
%%time
item_distances, item_similar_indices = get_similar(item_embedding, 5)

CPU times: user 159 ms, sys: 10 µs, total: 159 ms
Wall time: 162 ms


In [None]:
item_similar_indices

array([[   0,  493, 1592, 1543, 1523],
       [   1, 1222, 1122, 1248, 1472],
       [   2,  811, 1181, 1483, 1223],
       ...,
       [1679, 1666, 1642, 1664, 1636],
       [1680, 1669, 1624, 1658, 1664],
       [1681, 1663, 1666, 1642, 1658]])

In [None]:
show_similar(0, item_similar_indices, item_encoder)

ValueError: Number of rows must be a positive integer, not 2.0

<Figure size 2000x1000 with 0 Axes>